Skip to content

Commit 01b016f

Browse files
Apply fixes from Simple project
1 parent 94e4a77 commit 01b016f

34 files changed

+1593
-73
lines changed

seaofnodes/src/main/java/com/compilerprogramming/ezlang/compiler/codegen/BuildLRG.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ public static boolean run(int round, RegAlloc alloc) {
6464
}
6565

6666
// MultiNodes have projections which set registers
67-
if( n instanceof MultiNode )
67+
if( n instanceof MultiNode && !(n instanceof CFGNode) )
6868
for( Node proj : n.outs() )
6969
if( proj instanceof MachNode )
7070
defLRG(alloc,proj);

seaofnodes/src/main/java/com/compilerprogramming/ezlang/compiler/codegen/CodeGen.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,24 @@ public int getUID() {
9090
public int getALIAS() { return _alias++; }
9191

9292

93+
// idepths are cached and valid until *inserting* CFG edges (deleting is
94+
// OK). This happens with inlining, which bumps the version to bulk
95+
// invalidate the idepth caches.
96+
private int _iDepthVersion = 0;
97+
public void invalidateIDepthCaches() { _iDepthVersion++; }
98+
public boolean validIDepth(int idepth) {
99+
if( idepth==0 ) return false;
100+
if( _iDepthVersion==0 ) return true;
101+
return (idepth%100)==_iDepthVersion;
102+
}
103+
public int iDepthAt(int idepth) {
104+
return 100*idepth+_iDepthVersion;
105+
}
106+
public int iDepthFrom(int idepth) {
107+
assert idepth==0 || validIDepth(idepth);
108+
return idepth+100;
109+
}
110+
93111
// Popular visit bitset, declared here, so it gets reused all over
94112
public final BitSet _visit = new BitSet();
95113
public BitSet visit() { assert _visit.isEmpty(); return _visit; }

seaofnodes/src/main/java/com/compilerprogramming/ezlang/compiler/codegen/Encoding.java

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,6 @@ private void writeEncodings() {
323323
if( !(bb instanceof MachNode mach0) )
324324
_opStart[bb._nid] = _bits.size();
325325
else if( bb instanceof FunNode fun ) {
326-
padN(16,_bits);
327326
_fun = fun; // Currently encoding function
328327
_opStart[bb._nid] = _bits.size();
329328
mach0.encoding( this );
@@ -337,7 +336,6 @@ else if( bb instanceof FunNode fun ) {
337336
}
338337
}
339338
}
340-
padN(16,_bits);
341339
}
342340

343341
// --------------------------------------------------

seaofnodes/src/main/java/com/compilerprogramming/ezlang/compiler/codegen/GlobalCodeMotion.java

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,8 @@ private static void breadth(Node stop, Node[] ns, CFGNode[] late) {
167167
!(memuse instanceof NewNode) &&
168168
// Load-use directly defines memory
169169
(memuse._type instanceof SONTypeMem ||
170+
// Load-use directly defines memory
171+
memuse instanceof CallNode ||
170172
// Load-use indirectly defines memory
171173
(memuse._type instanceof SONTypeTuple tt && tt._types[ld._alias] instanceof SONTypeMem)) )
172174
continue outer;
@@ -200,6 +202,33 @@ private static void _doSchedLate(Node n, Node[] ns, CFGNode[] late) {
200202
if( n instanceof MemOpNode load && load._isLoad )
201203
lca = find_anti_dep(lca,load,early,late);
202204

205+
206+
// Nodes setting a single register and getting killed will stay close
207+
// to the uses, since they will be forced to spill anyway. The kill
208+
// check is very weak, and some may be hoisted only to spill in the RA.
209+
if( n instanceof MachNode mach ) {
210+
RegMask out = mach.outregmap();
211+
if( out!=null && out.size1() ) {
212+
int reg = mach.outregmap().firstReg();
213+
// Look for kills
214+
outer:
215+
for( CFGNode lca2=lca; lca2 != early; lca2 = lca2.idom() ) {
216+
if( lca2 instanceof MachNode mach2 ) {
217+
for( int i=1; i<lca2.nIns(); i++ ) {
218+
RegMask mask = mach2.regmap(i);
219+
if( mask!=null && mask.test(reg) ) {
220+
early = lca2 instanceof IfNode ? lca2.idom() : lca2;
221+
break outer;
222+
}
223+
}
224+
RegMask kill = mach2.killmap();
225+
if( kill != null )
226+
throw Utils.TODO();
227+
}
228+
}
229+
}
230+
}
231+
203232
// Walk up from the LCA to the early, looking for best place. This is
204233
// the lowest execution frequency, approximated by least loop depth and
205234
// deepest control flow.
@@ -231,8 +260,9 @@ private static CFGNode use_block(Node n, Node use, CFGNode[] late) {
231260
// Least loop depth first, then largest idepth
232261
private static boolean better( CFGNode lca, CFGNode best ) {
233262
return lca.loopDepth() < best.loopDepth() ||
234-
lca instanceof NeverNode ||
235-
(lca.idepth() > best.idepth() || best instanceof IfNode);
263+
lca instanceof NeverNode ||
264+
lca.idepth() > best.idepth() ||
265+
best instanceof IfNode;
236266
}
237267

238268
private static CFGNode find_anti_dep(CFGNode lca, MemOpNode load, CFGNode early, CFGNode[] late) {
@@ -249,9 +279,9 @@ private static CFGNode find_anti_dep(CFGNode lca, MemOpNode load, CFGNode early,
249279
lca = anti_dep( load, late[mem._nid], mem.cfg0(), lca, st );
250280
}
251281
break; // Loads do not cause anti-deps on other loads
252-
case CallNode st:
253-
assert late[st._nid]!=null;
254-
lca = anti_dep(load,late[st._nid],st.cfg0(),lca,st);
282+
case CallNode call:
283+
assert late[call._nid]!=null;
284+
lca = anti_dep(load,late[call._nid],call.cfg0(),lca,call);
255285
break;
256286
case PhiNode phi:
257287
// Repeat anti-dep for matching Phi inputs.

seaofnodes/src/main/java/com/compilerprogramming/ezlang/compiler/codegen/IFG.java

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -241,14 +241,12 @@ private static void selfConflict(RegAlloc alloc, Node n, LRG lrg, Node prior) {
241241
private static void mergeLiveOut( RegAlloc alloc, CFGNode priorbb, int i ) {
242242
CFGNode bb = priorbb.cfg(i);
243243
if( bb == null ) return; // Start has no prior
244-
if( !bb.blockHead() ) bb = bb.cfg0();
245-
//if( i==0 && !(bb instanceof StartNode) ) bb = bb.cfg0();
246-
assert bb.blockHead();
244+
while( !bb.blockHead() ) bb = bb.cfg0();
247245

248246
// Lazy get live-out set for bb
249-
IdentityHashMap<LRG, Node> lrgs = BBOUTS.computeIfAbsent( bb, k -> new IdentityHashMap<>() );
247+
IdentityHashMap<LRG, Node> lrgs = BBOUTS.computeIfAbsent( bb, k -> new IdentityHashMap<>() );
250248

251-
for( LRG lrg : TMP.keySet() ) {
249+
for( LRG lrg : TMP.keySet() ) {
252250
Node def = TMP.get(lrg);
253251
// Effective def comes from phi input from prior block
254252
if( def instanceof PhiNode phi && phi.cfg0()==priorbb ) {

seaofnodes/src/main/java/com/compilerprogramming/ezlang/compiler/codegen/Machine.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,12 @@ abstract public class Machine {
1616
public abstract long callerSave();
1717
// List of never-save registers, e.g. RSP or a ZERO register if you have one
1818
public abstract long neverSave();
19-
// Call Argument Mask
20-
public abstract RegMask callArgMask(SONTypeFunPtr tfp, int arg);
19+
// Call Argument Mask. Passed in the function signature and argument
20+
// number (2-based; 0 is for control and 1 for memory). Also passed in a 0
21+
// for the function itself, or for *outgoing* calls, the maximum stack slot
22+
// given to the incoming function arguments (stack slots reserved for
23+
// incoming arguments).
24+
public abstract RegMask callArgMask(SONTypeFunPtr tfp, int arg, int maxArgSlot);
2125
// Return register mask, based on signature (GPR vs FPR)
2226
public abstract RegMask retMask(SONTypeFunPtr tfp);
2327
// Return PC register

seaofnodes/src/main/java/com/compilerprogramming/ezlang/compiler/codegen/RegAlloc.java

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ boolean splitEmptyMaskSimple( byte round, LRG lrg ) {
261261
}
262262

263263
// Single-def live range with an empty mask. There are many single-reg
264-
// uses. Theory is there's many repeats if the same reg amongst the uses.
264+
// uses. Theory is there's many repeats of the same reg amongst the uses.
265265
// In of splitting once per use, start by splitting into groups based on
266266
// required input register.
267267
boolean splitEmptyMaskByUse( byte round, LRG lrg ) {
@@ -270,35 +270,45 @@ boolean splitEmptyMaskByUse( byte round, LRG lrg ) {
270270
// Look at each use, and break into non-overlapping register classes.
271271
Ary<RegMask> rclass = new Ary<>(RegMask.class);
272272
boolean done=false;
273+
int ncalls=0;
273274
while( !done ) {
274275
done = true;
275276
for( Node use : def._outputs )
276-
if( use instanceof MachNode mach )
277+
if( use instanceof MachNode mach ) {
278+
if( mach instanceof CallNode ) ncalls++;
277279
for( int i=1; i<use.nIns(); i++ )
278280
if( use.in(i)==def )
279281
done = putIntoRegClass( rclass, mach.regmap(i) );
282+
}
280283
}
281284

282-
// See how many register classes we split into
283-
if( rclass._len <= 1 ) return false;
285+
// See how many register classes we split into. Generally not
286+
// productive to split like this across calls, which are going to kill
287+
// all registers anyways.
288+
if( rclass._len <= 1 || ncalls > 1 ) return false;
284289

285-
// Split by class
290+
// Split by classh
291+
Ary<Node> ns = new Ary<>(Node.class);
286292
for( RegMask rmask : rclass ) {
293+
ns.addAll(def._outputs);
287294
Node split = makeSplit(def,"popular",round,lrg);
288295
split.insertAfter( def );
289296
if( split.nIns()>1 ) split.setDef(1,def);
290297
// all uses by class to split
291-
for( int j=0; j < def._outputs._len; j++ ) {
292-
Node use = def._outputs.at(j);
298+
for( Node use : ns ) {
293299
if( use instanceof MachNode mach && use!=split ) {
294300
// Check all use inputs for n, in case there's several
295301
for( int i = 1; i < use.nIns(); i++ )
296302
// Find a def input, and check register class
297-
if( use.in( i ) == def && mach.regmap( i ).overlap( rmask ) )
298-
// Modify use to use the split version specialized to this rclass
299-
{ use.setDef( i, split ); j--; break; }
303+
if( use.in( i ) == def ) {
304+
RegMask m = mach.regmap( i );
305+
if( m!=null && mach.regmap( i ).overlap( rmask ) )
306+
// Modify use to use the split version specialized to this rclass
307+
use.setDefOrdered( i, split );
308+
}
300309
}
301310
}
311+
ns.clear();
302312
}
303313
return true;
304314
}

seaofnodes/src/main/java/com/compilerprogramming/ezlang/compiler/codegen/RegMask.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@
44
import com.compilerprogramming.ezlang.compiler.SB;
55

66
/** RegMask
7-
* A "register mask" - 1 bit set for each allowed register. In addition
7+
* A "register mask" - 1 bit set for each allowed register. In addition,
88
* "stack slot" registers may be allowed, effectively making the set infinite.
99
* <p>
1010
* For smaller and simpler machines it suffices to make such masks an i64 or
11-
* i128 (64 or 128 bit integers), and this presentation is by far the better
11+
* i128 (64- or 128-bit integers), and this presentation is by far the better
1212
* way to go... if all register allocations can fit in this bit limitation.
1313
* The allocator will need bits for stack-based parameters and for splits
1414
* which cannot get a register. For a 32-register machine like the X86, add 1
@@ -28,7 +28,7 @@ public class RegMask {
2828

2929
public RegMask(int bit) {
3030
if( bit < 64 ) _bits0 = 1L<<bit;
31-
else _bits1 = 1L<<(bit=64);
31+
else _bits1 = 1L<<(bit-64);
3232
}
3333
public RegMask(long bits ) { _bits0 = bits; }
3434
public RegMask(long bits0, long bits1 ) { _bits0 = bits0; _bits1 = bits1; }

seaofnodes/src/main/java/com/compilerprogramming/ezlang/compiler/nodes/CFGNode.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package com.compilerprogramming.ezlang.compiler.nodes;
22

33
import com.compilerprogramming.ezlang.compiler.Compiler;
4+
import com.compilerprogramming.ezlang.compiler.codegen.CodeGen;
45
import com.compilerprogramming.ezlang.compiler.sontypes.*;
56
import java.util.BitSet;
67

@@ -68,7 +69,9 @@ public CFGNode uctrlSkipEmpty() {
6869
* See {@link <a href="https://en.wikipedia.org/wiki/Dominator_(graph_theory)">...</a>}
6970
*/
7071
public int _idepth;
71-
public int idepth() { return _idepth==0 ? (_idepth=idom().idepth()+1) : _idepth; }
72+
public int idepth() {
73+
return CodeGen.CODE.validIDepth(_idepth) ? _idepth : (_idepth=CodeGen.CODE.iDepthFrom(idom().idepth()));
74+
}
7275

7376
// Return the immediate dominator of this Node and compute dom tree depth.
7477
public CFGNode idom(Node dep) { return cfg(0); }

seaofnodes/src/main/java/com/compilerprogramming/ezlang/compiler/nodes/CallEndNode.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,9 @@ public Node idealize() {
7474
assert fun.in(1) instanceof StartNode && fun.in(2)==call;
7575
// Disallow self-recursive inlining (loop unrolling by another name)
7676
CFGNode idom = call;
77-
while( !(idom instanceof FunNode fun2) )
77+
while( !(idom instanceof FunNode) )
7878
idom = idom.idom();
79+
// Inline?
7980
if( idom != fun ) {
8081
// Trivial inline: rewrite
8182
_folding = true;
@@ -85,6 +86,9 @@ public Node idealize() {
8586
fun.setDef(2,call.ctrl()); // Bypass the Call;
8687
fun.ret().setDef(3,null); // Return is folding also
8788
CodeGen.CODE.addAll(fun._outputs);
89+
// Inlining immediately blows all cache idepth fields past the inline point.
90+
// Bump the global version number invalidating them en-masse.
91+
CodeGen.CODE.invalidateIDepthCaches();
8892
return this;
8993
}
9094
} else {

seaofnodes/src/main/java/com/compilerprogramming/ezlang/compiler/nodes/FunNode.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,11 @@ public Node idealize() {
114114
return null;
115115
}
116116

117-
// Bypass Region idom, always assume depth == 1, one more than Start
118-
@Override public int idepth() { return (_idepth=1); }
117+
// Bypass Region idom, always assume depth == 1, one more than Start,
118+
// unless folding then just a ID on input#1
119+
@Override public int idepth() {
120+
return _folding ? super.idepth() : CodeGen.CODE.iDepthAt(1);
121+
}
119122
// Bypass Region idom, always assume idom is Start
120123
@Override public CFGNode idom(Node dep) { return cfg(1); }
121124

seaofnodes/src/main/java/com/compilerprogramming/ezlang/compiler/nodes/LoopNode.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ public SONType compute() {
2121
}
2222

2323
// Bypass Region idom, same as the default idom() using use in(1) instead of in(0)
24-
@Override public int idepth() { return _idepth==0 ? (_idepth=idom().idepth()+1) : _idepth; }
24+
public int idepth() {
25+
return CodeGen.CODE.validIDepth(_idepth) ? _idepth : (_idepth=CodeGen.CODE.iDepthFrom(idom().idepth()));
26+
}
2527
// Bypass Region idom, same as the default idom() using use in(1) instead of in(0)
2628
@Override public CFGNode idom(Node dep) { return entry(); }
2729

seaofnodes/src/main/java/com/compilerprogramming/ezlang/compiler/nodes/NewNode.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,8 @@ public SONTypeTuple compute() {
9292
private RegMask _retMask;
9393
private RegMask _kills;
9494
public void cacheRegs(CodeGen code) {
95-
_arg2Reg = code._mach.callArgMask(SONTypeFunPtr.CALLOC,2).firstReg();
96-
_arg3Mask = code._mach.callArgMask(SONTypeFunPtr.CALLOC,3);
95+
_arg2Reg = code._mach.callArgMask(SONTypeFunPtr.CALLOC,2,0).firstReg();
96+
_arg3Mask = code._mach.callArgMask(SONTypeFunPtr.CALLOC,3,0);
9797
// Return mask depends on TFP (either GPR or FPR)
9898
_retMask = code._mach.retMask(SONTypeFunPtr.CALLOC);
9999
// Kill mask is all caller-saves, and any mirror stack slots for args

seaofnodes/src/main/java/com/compilerprogramming/ezlang/compiler/nodes/PhiNode.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ public class PhiNode extends Node {
1414
final SONType _declaredType;
1515

1616
public PhiNode(String label, SONType declaredType, Node... inputs) { super(inputs); _label = label; assert declaredType!=null; _declaredType = declaredType; }
17-
public PhiNode(PhiNode phi, String label, SONType declaredType) { super(phi); _label = label; _declaredType = declaredType; }
17+
public PhiNode(PhiNode phi, String label, SONType declaredType) { super(phi); _label = label; _type = _declaredType = declaredType; }
1818
public PhiNode(PhiNode phi) { super(phi); _label = phi._label; _declaredType = phi._declaredType; }
1919

2020
public PhiNode(RegionNode r, Node sample) {

seaofnodes/src/main/java/com/compilerprogramming/ezlang/compiler/nodes/RegionNode.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -137,12 +137,13 @@ boolean hasMidUser(RegionNode r) {
137137

138138
// Immediate dominator of Region is a little more complicated.
139139
@Override public int idepth() {
140-
if( _idepth!=0 ) return _idepth;
140+
if( CodeGen.CODE.validIDepth(_idepth) )
141+
return _idepth;
141142
int d=0;
142143
for( Node n : _inputs )
143144
if( n!=null )
144-
d = Math.max(d,((CFGNode)n).idepth()+1);
145-
return _idepth=d;
145+
d = Math.max(d,CodeGen.CODE.iDepthFrom(((CFGNode)n).idepth()));
146+
return _idepth = d;
146147
}
147148

148149
@Override public CFGNode idom(Node dep) {

seaofnodes/src/main/java/com/compilerprogramming/ezlang/compiler/nodes/StartNode.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ public StringBuilder _print1(StringBuilder sb, BitSet visited) {
5151
@Override public Node idealize() { return null; }
5252

5353
// No immediate dominator, and idepth==0
54-
@Override public int idepth() { return 0; }
54+
@Override public int idepth() { return CodeGen.CODE.iDepthAt(0); }
5555
@Override public CFGNode idom(Node dep) { return null; }
5656

5757
}

seaofnodes/src/main/java/com/compilerprogramming/ezlang/compiler/nodes/StoreNode.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,11 @@ public Node idealize() {
8686
}
8787
}
8888

89+
// Store of zero after alloc
90+
if( mem() instanceof ProjNode prj && prj.in(0) instanceof NewNode &&
91+
prj.in(0)==ptr().in(0) && // Same NewNode memory & pointer
92+
(val()._type==SONTypeInteger.ZERO || val()._type==SONType.NIL ) )
93+
return mem();
8994

9095
return null;
9196
}

seaofnodes/src/main/java/com/compilerprogramming/ezlang/compiler/nodes/cpus/arm/CallARM.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ public class CallARM extends CallNode implements MachNode, RIPRelSize {
2121
@Override public String label() { return op(); }
2222
@Override public String name() { return _name; }
2323
@Override public SONTypeFunPtr tfp() { return _tfp; }
24-
@Override public RegMask regmap(int i) { return arm.callInMask(_tfp,i); }
24+
@Override public RegMask regmap(int i) { return arm.callInMask(_tfp,i,fun()._maxArgSlot); }
2525
@Override public RegMask outregmap() { return null; }
2626

2727
@Override public void encoding( Encoding enc ) {

seaofnodes/src/main/java/com/compilerprogramming/ezlang/compiler/nodes/cpus/arm/CallRRARM.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ public class CallRRARM extends CallNode implements MachNode {
1111
@Override public RegMask regmap(int i) {
1212
return i==_inputs._len
1313
? arm.RMASK // Function call target
14-
: arm.callInMask(tfp(),i); // Normal argument
14+
: arm.callInMask(tfp(),i,fun()._maxArgSlot); // Normal argument
1515
}
1616
@Override public RegMask outregmap() { return null; }
1717

seaofnodes/src/main/java/com/compilerprogramming/ezlang/compiler/nodes/cpus/arm/ParmARM.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ public class ParmARM extends ParmNode implements MachNode {
99
final RegMask _rmask;
1010
ParmARM(ParmNode parm) {
1111
super(parm);
12-
_rmask = arm.callInMask(fun().sig(),_idx);
12+
_rmask = arm.callInMask(fun().sig(),_idx,0);
1313
}
1414
@Override public RegMask regmap(int i) { return null; }
1515
@Override public RegMask outregmap() { return _rmask; }

0 commit comments

Comments
 (0)