forked from googleprojectzero/fuzzilli
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathWasmLifter.swift
1567 lines (1383 loc) · 69.4 KB
/
WasmLifter.swift
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import Foundation
/// Represents the type identifiers for each code section according to the wasm
/// spec.
private enum WasmSection: UInt8 {
case custom = 0
case type = 1
case `import`
case function
case table
case memory
case global
case export
case start
case element
case code
case data
case datacount
case tag
}
// This maps ILTypes to their respective binary encoding.
private let ILTypeMapping: [ILType: Data] = [
.wasmi32 : Data([0x7f]),
.wasmi64 : Data([0x7e]),
.wasmf32 : Data([0x7D]),
.wasmf64 : Data([0x7C]),
.wasmExternRef: Data([0x6f]),
.wasmFuncRef: Data([0x70]),
.wasmSimd128: Data([0x7B]),
.bigint : Data([0x7e]), // Maps to .wasmi64
.anything: Data([0x6f]), // Maps to .wasmExternRef
.integer: Data([0x7f]), // Maps to .wasmi32
.number: Data([0x7d]) // Maps to .wasmf32
]
/// This is the main compiler for Wasm instructions.
/// This lifter collects all wasm instructions during lifting
/// (The JavaScriptLifter passes them to this instance) and then it compiles them
/// at the end of the block when we see a EndWasmModule instruction.
/// This way the WasmLifter has full information before it actually emits any bytes.
public class WasmLifter {
/// This enum describes various failure cases that might arise from mutation
/// in the JS part of the sample, which can invalidate some of the Wasm code.
public enum CompileError: Error {
// If we invalidated some import from JavaScript, see `buildImportSection`.
case unknownImportType
// If we fail to lookup the index in one of the sections, see `resolveIdx`.
case failedIndexLookUp
// If the signature is not found, see `getSignatureIdx`.
case failedSignatureLookUp
// If the branch target is invalid.
case invalidBranch
// If type information is not available where we need it.
case missingTypeInformation
// If we fail to find a variable during import linking
case failedRetrieval
}
private var logger = Logger(withLabel: "WasmLifter")
// The actual bytecode we compile.
private var bytecode: Data = Data()
// The level of verboseness
private var verbose: Bool = false
// The string that is given to the script writer
private var out: String = ""
// This typer holds information from the "outside" JS world.
// It is created during lifting of JavaScript and the JavaScriptLifter passes it into the WasmLifter.
private var typer: JSTyper
// This contains the instructions that we need to lift.
private var instructionBuffer: Code = Code()
// TODO(cffsmith): we could do some checking here that the function is actually defined, at that point it would not be static anymore though.
public static func nameOfFunction(_ idx: Int) -> String {
return "w\(idx)"
}
// TODO(cffsmith): we could do some checking here that the global is actually defined, at that point it would not be static anymore though.
public static func nameOfGlobal(_ idx: Int) -> String {
return "wg\(idx)"
}
public static func nameOfTable(_ idx: Int) -> String {
return "wt\(idx)"
}
// This contains imports, i.e. WasmJsCall arguments, tables, globals and memories that are not defined in this module. We need to track them here so that we can properly wire up the imports when lifting this module.
// The Signature is only valid if the Variable is the argument to a WasmJsCall instruction, it is the Signature contained in the instruction. This Signature that is in the instruction is a loose approximation of the JS Signature, it depends on available Wasm types at the time when it was generated.
private var imports: [(Variable, Signature?)] = []
// This tracks instructions that define globals in this module. We track the instruction as all the information, as well as the actual value for initialization is stored in the Operation instead of the Variable.
private var globals: [Instruction] = []
// This tracks instructions that define memories in this module. We track the instruction here as the limits are also encoded in the Operation.
private var memories: [Instruction] = []
// This tracks instructions that define tables in this module. We track the instruction here as the table type and its limits are encoded in the Operation.
private var tables: [Instruction] = []
// The tags associated with this module.
private var tags: VariableMap<ParameterList> = VariableMap()
// The function index space
private var functionIdxBase = 0
// The signature index space.
private var signatures : [Signature] = []
private var signatureIndexMap : [Signature: Int] = [:]
// This should only be set once we have preprocessed all imported globals, so that we know where internally defined globals start
private var baseDefinedGlobals: Int? = nil
// This should only be set once we have preprocessed all imported tables, so that we know where internally defined tables start
private var baseDefinedTables: Int? = nil
// This tracks in which order we have seen globals, this can probably be unified with the .globals and .imports properties, as they should match in their keys.
private var globalOrder: [Variable] = []
public init(withTyper typer: JSTyper) {
self.typer = typer
}
private class WasmExprWriter {
// This maps variables to its bytecode.
// If we see an instruction we can just push it onto here, and if we actually see the variable as an input, we can then
// Emit a load.
// This either contains the rawByte code to construct the output, i.e. const.i64
// Or it contains a load to a local variable if the instr has multiple inputs.
private var varMap: VariableMap<Data> = VariableMap()
// Tracks variables that we have emitted at least once.
private var emittedVariables: Set<Variable> = []
func addExpr(for variable: Variable, bytecode: Data) {
self.varMap[variable] = bytecode
}
func getExpr(for variable: Variable) -> Data? {
let expr = self.varMap[variable]
emittedVariables.insert(variable)
return expr
}
// Return all not-yet-emitted variables
// TODO: this does not preserve order?
func getPendingVars() -> [Variable] {
varMap.filter({ !emittedVariables.contains($0.0) }).map { $0.0 }
}
public var isEmpty: Bool {
return varMap.isEmpty
}
}
private var writer = WasmExprWriter()
var isEmpty: Bool {
return instructionBuffer.isEmpty &&
self.bytecode.isEmpty &&
self.functions.isEmpty
}
// TODO: maybe we can do some analysis based on blocks.
// Get the type index or something or infer the value type that it tries to return? With BlockInfo class or something like that?
private func updateVariableAnalysis(forInstruction wasmInstruction: Instruction) {
// Only analyze an instruction if we are inside a function definition.
if let currentFunction = currentFunction {
// We don't need to analyze the Begin instruction which opened this one.
// TODO: can this be done more neatly? i.e. re-order analyis and emitting the instruction?
if wasmInstruction.op is BeginWasmFunction {
return
}
currentFunction.variableAnalyzer.analyze(wasmInstruction)
}
}
// Holds various information for the functions in a wasm module.
private class FunctionInfo {
var signature: Signature
var code: Data
var outputVariable: Variable? = nil
// Locals that we spill to, this maps from the ordering to the stack.
var localsInfo: [(Variable, ILType)]
var variableAnalyzer = VariableAnalyzer()
weak var lifter: WasmLifter?
// Tracks the labels and the branch depth they've been emitted at. This is needed to calculate how far "out" we have to branch to
// Whenever we start something that emits a label, we need to track the branch depth here.
// This should be local to a function
public var labelBranchDepthMapping: VariableMap<Int> = VariableMap()
// Expects the withArguments array to contain the variables of the innerOutputs, they should map directly to the local indices.
init(_ signature: Signature, _ code: Data, for lifter: WasmLifter, withArguments arguments: [Variable]) {
// Infer the first few locals from this signature.
self.signature = signature
self.code = code
self.localsInfo = [(Variable, ILType)]()
self.lifter = lifter
assert(signature.parameters.count == arguments.count)
// Populate the localsInfo with the parameter types
for (idx, argVar) in arguments.enumerated() {
switch signature.parameters[idx] {
case .plain(let argType):
self.localsInfo.append((argVar, argType))
// Emit the expressions for the parameters such that we can accesss them if we need them.
self.lifter!.writer.addExpr(for: argVar, bytecode: Data([0x20, UInt8(self.localsInfo.count - 1)]))
default:
fatalError("Cannot have a non-plain argument as a function parameter")
}
}
}
func appendToCode(_ code: Data) {
self.code += code
}
// This collects the variables that we spill.
func spillLocal(forVariable variable: Variable) {
self.localsInfo.append((variable, lifter!.typer.type(of: variable)))
assert(lifter!.typer.type(of: variable).Is(.wasmPrimitive))
// Do a local.set on the stack slot
self.code += Data([0x21, UInt8(localsInfo.count - 1)])
}
func isLocal(_ variable: Variable) -> Bool {
self.localsInfo.contains(where: {$0.0 == variable})
}
func getStackSlot(for variable: Variable) -> Int? {
return self.localsInfo.firstIndex(where: { $0.0 == variable })
}
// This loads a variable from the stack. This is designed for arguments of functions
func addStackLoad(for variable: Variable) {
// We expect to do this for innerOutputs.
assert(isLocal(variable) && getStackSlot(for: variable) != nil)
// This emits a local.get for the function argument.
self.code += Data([0x20, UInt8(getStackSlot(for: variable)!)])
}
}
// The parameters, actual bytecode and number of locals of the functions.
private var functions: [FunctionInfo] = []
private var currentFunction: FunctionInfo? = nil
public func addInstruction(_ instruction: Instruction) {
self.instructionBuffer.append(instruction)
}
public func lift(binaryOutPath path: String? = nil) throws -> (Data, [Variable]) {
// Lifting currently happens in three stages.
// 1. Collect all necessary information to build all sections later on.
// - For now this only the importAnalysis, which needs to know how many imported vs internally defined types exist.
// 2. Lift each instruction within its local context using all information needed from the previous analysis inside of a given function
// 3. Use the already lifted functions to emit the whole Wasm byte buffer.
// Step 1:
// Collect all information that we need to later wire up the imports correctly, this means we look at instructions that can potentially import any variable that originated outside the Wasm module.
try importAnalysis()
// Todo: maybe add a def-use pass here to figure out where we need stack spills etc? e.g. if we have one use, we can omit the stack spill
//
// Step 1 Done
//
//
// Step 2:
//
// Lift each instruction individually into a byte buffer. This happens sequentially, you better have all the information you will need here.
// Collect function/type/signature information.
for instr in self.instructionBuffer {
let needsByteEmission = updateLifterState(wasmInstruction: instr)
// TODO: Check if we are in a .wasmFunction context and if so, update variableAnalysis.
updateVariableAnalysis(forInstruction: instr)
if needsByteEmission {
// If we require inputs for this instruction, we probably need to emit them now, either inline the corresponding instruction, iff this is a single use, or load the stack slot or the variable. TODO: not all of this is implemented.
emitInputLoadsIfNecessary(forInstruction: instr)
// Emit the actual bytes that correspond to this instruction to the corresponding function byte array.
try emitBytesForInstruction(forInstruction: instr)
// If this instruction produces any outputs, we might need to explicitly spill to the stack.
emitStackSpillsIfNecessary(forInstruction: instr)
}
}
//
// Step 2 Done
// All functions should have associated byte code at this point.
//
//
// Step 3: Lift the whole module and put everything together.
//
if verbose {
print("Got the following functions")
for function in functions {
print("\(String(describing: function))")
}
}
// Build the header section which includes the Wasm version first
self.buildHeader()
self.buildTypeSection()
try self.buildImportSection()
try self.buildFunctionSection()
self.buildTableSection()
self.buildMemorySection()
try self.buildTagSection()
try self.buildGlobalSection()
// Export all functions by default.
try self.buildExportedSection()
// Build element segments for defined tables.
try self.buildElementSection()
// The actual bytecode of the functions.
self.buildCodeSection(self.instructionBuffer)
// Write the bytecode as file to the given path for debugging purposes.
if let path = path {
let url = URL(fileURLWithPath: path)
try? bytecode.write(to: url)
}
//
// Step 3 done
//
return (bytecode, imports.map { $0.0 })
}
private func buildHeader() {
// Build the magic and the version of wasm we compile to.
self.bytecode += [0x0]
self.bytecode += "asm".data(using: .ascii)!
// LE encoded 1 as the wasm version.
self.bytecode += [0x1, 0x0, 0x0, 0x0]
}
private func buildTypeSection() {
self.bytecode += [WasmSection.type.rawValue]
var temp = Data()
// Collect all signatures.
for (_, signature) in self.imports {
if let signature {
registerSignature(signature)
}
}
for tag in self.tags {
registerSignature(tag.1 => .nothing)
}
for function in self.functions {
registerSignature(function.signature)
}
let typeCount = self.signatures.count
temp += Leb128.unsignedEncode(typeCount)
for signature in self.signatures {
temp += [0x60]
temp += Leb128.unsignedEncode(signature.parameters.count)
for paramType in signature.parameters {
switch paramType {
case .plain(let paramType):
temp += ILTypeMapping[paramType]!
default:
fatalError("unreachable")
}
}
if signature.outputType != .nothing {
temp += Leb128.unsignedEncode(1) // num output types
temp += ILTypeMapping[signature.outputType] ?? Data([0x6f])
} else {
temp += [0x00] // num output types
}
}
if verbose {
print("Type section is")
for byte in temp {
print(String(format: "%02X ", byte))
}
}
// Append the length of the section and the section contents itself.
self.bytecode.append(Leb128.unsignedEncode(temp.count))
self.bytecode.append(temp)
}
private func registerSignature(_ signature: Signature) {
assert(signatures.count == signatureIndexMap.count)
if signatureIndexMap[signature] != nil {
return
}
let signatureIndex = signatures.count
signatures.append(signature)
signatureIndexMap[signature] = signatureIndex
assert(signatures.count == signatureIndexMap.count)
}
private func getSignatureIndex(_ signature: Signature) throws -> Int {
if let idx = signatureIndexMap[signature] {
return idx
}
throw WasmLifter.CompileError.failedSignatureLookUp
}
private func buildImportSection() throws {
if self.imports.isEmpty {
return
}
self.bytecode += [WasmSection.import.rawValue]
var temp = Data()
temp += Leb128.unsignedEncode(self.imports.map { $0 }.count)
// Build the import components of this vector that consist of mod:name, nm:name, and d:importdesc
for (idx, (importVariable, signature)) in self.imports.enumerated() {
if verbose {
print(importVariable)
}
// Append the name as a vector
temp += Leb128.unsignedEncode("imports".count)
temp += "imports".data(using: .utf8)!
var importName : String
importName = "import_\(idx)_\(importVariable)"
temp += Leb128.unsignedEncode(importName.count)
temp += importName.data(using: .utf8)!
let type = typer.type(of: importVariable)
// This is a temporary workaround for functions that have been marked as suspendable.
if type.Is(.function()) || type.Is(.object(ofGroup: "WebAssembly.SuspendableObject")) {
if verbose {
print(functionIdxBase)
}
temp += [0x0] + Leb128.unsignedEncode(try getSignatureIndex(signature!))
// Update the index space, these indices have to be set before the exports are set
functionIdxBase += 1
continue
}
if type.Is(.object(ofGroup: "WasmMemory")) {
// Emit import type.
temp += Data([0x2])
let mem = type.wasmMemoryType!
let limits_byte: UInt8 = (mem.isMemory64 ? 4 : 0) | (mem.isShared ? 2 : 0) | (mem.limits.max != nil ? 1 : 0);
temp += Data([limits_byte])
temp += Data(Leb128.unsignedEncode(mem.limits.min))
if let maxPages = mem.limits.max {
temp += Data(Leb128.unsignedEncode(maxPages))
}
continue
}
if type.Is(.object(ofGroup: "WasmTable")) {
let tableType = type.wasmTableType!.elementType
assert(tableType == ILType.wasmExternRef)
let minSize = type.wasmTableType!.limits.min
let maxSize = type.wasmTableType!.limits.max
temp += Data([0x1])
temp += ILTypeMapping[tableType]!
if let maxSize = maxSize {
temp += Data([0x1] + Leb128.unsignedEncode(minSize) + Leb128.unsignedEncode(maxSize))
} else {
temp += Data([0x0] + Leb128.unsignedEncode(minSize))
}
continue
}
if type.Is(.object(ofGroup: "WasmGlobal")) {
let valueType = type.wasmGlobalType!.valueType
let mutability = type.wasmGlobalType!.isMutable
temp += [0x3]
temp += ILTypeMapping[valueType]!
temp += mutability ? [0x1] : [0x0]
continue
}
if type.Is(.object(ofGroup: "WasmTag")) {
temp += [0x4, 0x0] + Leb128.unsignedEncode(try getSignatureIndex(signature!))
continue
}
throw WasmLifter.CompileError.unknownImportType
}
self.bytecode.append(Leb128.unsignedEncode(temp.count))
self.bytecode.append(temp)
if verbose {
print("import section is")
for byte in temp {
print(String(format: "%02X ", byte))
}
}
}
private func buildFunctionSection() throws {
self.bytecode += [WasmSection.function.rawValue]
// The number of functions we have, as this is a vector of type idxs.
// TODO(cffsmith): functions can share type indices. This could be an optimization later on.
var temp = Leb128.unsignedEncode(self.functions.count)
for info in self.functions {
temp.append(Leb128.unsignedEncode(try getSignatureIndex(info.signature)))
}
// Append the length of the section and the section contents itself.
self.bytecode.append(Leb128.unsignedEncode(temp.count))
self.bytecode.append(temp)
if verbose {
print("function section is")
for byte in temp {
print(String(format: "%02X ", byte))
}
}
}
private func buildTableSection() {
self.bytecode += [WasmSection.table.rawValue]
var temp = Leb128.unsignedEncode(self.tables.count)
for instruction in self.tables {
let op = instruction.op as! WasmDefineTable
let elementType = op.tableType.elementType
let minSize = op.tableType.limits.min
let maxSize = op.tableType.limits.max
temp += ILTypeMapping[elementType]!
if let maxSize = maxSize {
temp += Data([0x1] + Leb128.unsignedEncode(minSize) + Leb128.unsignedEncode(maxSize))
} else {
temp += Data([0x0] + Leb128.unsignedEncode(minSize))
}
}
// Append the length of the section and the section contents itself.
self.bytecode.append(Leb128.unsignedEncode(temp.count))
self.bytecode.append(temp)
if verbose {
print("table section is")
for byte in temp {
print(String(format: "%02X ", byte))
}
}
}
// Only supports:
// - active segments
// - with custom table id
// - function-indices-as-elements (i.e. case 2 of the spec: https://webassembly.github.io/spec/core/binary/modules.html#element-section)
// - one segment per table (assumes entries are continuous)
// - constant starting index.
private func buildElementSection() throws {
self.bytecode += [WasmSection.element.rawValue]
var temp = Data();
let numDefinedTablesWithEntries = self.tables.count { instruction in
!(instruction.op as! WasmDefineTable).definedEntryIndices.isEmpty
}
// Element segment count.
temp += Leb128.unsignedEncode(numDefinedTablesWithEntries);
for instruction in self.tables {
let definedEntryIndices = (instruction.op as! WasmDefineTable).definedEntryIndices
assert(definedEntryIndices.count == instruction.inputs.count)
if definedEntryIndices.isEmpty { continue }
// Element segment case 2 definition.
temp += [0x02]
let tableIndex = try self.resolveIdx(ofType: .table, for: instruction.output)
temp += Leb128.unsignedEncode(tableIndex)
// Starting index. Assumes all entries are continuous.
temp += [0x41]
temp += Leb128.unsignedEncode(definedEntryIndices[0])
temp += [0x0b] // end
// elemkind
temp += [0x00]
// entry count
temp += Leb128.unsignedEncode(definedEntryIndices.count)
// entries
for entry in instruction.inputs {
let functionId = try resolveIdx(ofType: .function, for: entry)
temp += Leb128.unsignedEncode(functionId)
}
}
self.bytecode.append(Leb128.unsignedEncode(temp.count))
self.bytecode.append(temp)
if verbose {
print("element section is")
for byte in temp {
print(String(format: "%02X ", byte))
}
}
}
private func buildCodeSection(_ instructions: Code) {
self.bytecode += [WasmSection.code.rawValue]
// Build the contents of the section
var temp = Data()
temp += Leb128.unsignedEncode(self.functions.count)
for (_, functionInfo) in self.functions.enumerated() {
if verbose {
print("code is:")
for byte in functionInfo.code {
print(String(format: "%02X", byte))
}
print("end of code")
}
var funcTemp = Data()
// TODO: this should be encapsulated more nicely. There should be an interface that gets the locals without the parameters. As this is currently mainly used to get the slots info.
// Encode number of locals
funcTemp += Leb128.unsignedEncode(functionInfo.localsInfo.count - functionInfo.signature.parameters.count)
for (_, type) in functionInfo.localsInfo[functionInfo.signature.parameters.count...] {
// Encode the locals
funcTemp += Leb128.unsignedEncode(1)
// HINT: If you crash here, you might not have specified an encoding for your new type in `ILTypeMapping`.
funcTemp += ILTypeMapping[type]!
}
// append the actual code and the end marker
funcTemp += functionInfo.code
funcTemp += [0x0b]
// Append the function object to the section
temp += Leb128.unsignedEncode(funcTemp.count)
temp += funcTemp
}
// Append the length of the section and the section contents itself.
self.bytecode.append(Leb128.unsignedEncode(temp.count))
self.bytecode.append(temp)
if verbose {
print("Code section is")
for byte in temp {
print(String(format: "%02X ", byte))
}
}
}
private func buildGlobalSection() throws {
self.bytecode += [WasmSection.global.rawValue]
var temp = Data()
temp += Leb128.unsignedEncode(self.globals.map { $0 }.count)
// TODO: in the future this should maybe be a context that allows instructions? Such that we can fuzz this expression as well?
for instruction in self.globals {
let definition = instruction.op as! WasmDefineGlobal
let global = definition.wasmGlobal
temp += ILTypeMapping[global.toType()]!
temp += Data([definition.isMutable ? 0x1 : 0x0])
// This has to be a constant expression: https://webassembly.github.io/spec/core/valid/instructions.html#constant-expressions
var temporaryInstruction: Instruction? = nil
// Also create some temporary output variables that do not have a number, these are only to satisfy the instruction assertions, maybe this can be done more nicely somehow.
switch global {
case .wasmf32(let val):
temporaryInstruction = Instruction(Constf32(value: val), output: Variable())
case .wasmf64(let val):
temporaryInstruction = Instruction(Constf64(value: val), output: Variable())
case .wasmi32(let val):
temporaryInstruction = Instruction(Consti32(value: val), output: Variable())
case .wasmi64(let val):
temporaryInstruction = Instruction(Consti64(value: val), output: Variable())
case .refNull,
.refFunc(_),
.imported(_):
fatalError("unreachable")
}
temp += try lift(temporaryInstruction!)
temp += Data([0x0B])
}
// Append the length of the section and the section contents itself.
self.bytecode.append(Leb128.unsignedEncode(temp.count))
self.bytecode.append(temp)
if verbose {
print("global section is")
for byte in temp {
print(String(format: "%02X ", byte))
}
}
}
private func buildMemorySection() {
self.bytecode += [WasmSection.memory.rawValue]
var temp = Data()
// The amount of memories we have, per standard this can currently only be one, either defined or imported
// https://webassembly.github.io/spec/core/syntax/modules.html#memories
temp += Leb128.unsignedEncode(memories.count)
for instruction in memories {
let type = typer.type(of: instruction.output)
assert(type.isWasmMemoryType)
let mem = type.wasmMemoryType!
let limits_byte: UInt8 = (mem.isMemory64 ? 4 : 0) | (mem.isShared ? 2 : 0) | (mem.limits.max != nil ? 1 : 0);
temp += Data([limits_byte])
temp += Data(Leb128.unsignedEncode(mem.limits.min))
if let maxPages = mem.limits.max {
temp += Data(Leb128.unsignedEncode(maxPages))
}
}
self.bytecode.append(Leb128.unsignedEncode(temp.count))
self.bytecode.append(temp)
if verbose {
print("memory section is")
for byte in temp {
print(String(format: "%02X ", byte))
}
}
}
private func buildTagSection() throws {
if self.tags.isEmpty {
return // Skip the whole section.
}
self.bytecode.append(WasmSection.tag.rawValue)
var section = Data()
section += Leb128.unsignedEncode(self.tags.reduce(0, {res, _ in res + 1}))
for tag in self.tags {
section.append(0)
section.append(Leb128.unsignedEncode(try getSignatureIndex(tag.1 => .nothing)))
}
self.bytecode.append(Leb128.unsignedEncode(section.count))
self.bytecode.append(section)
if verbose {
print("tag section is")
for byte in section {
print(String(format: "%02X ", byte))
}
}
}
// Export all functions and globals by default.
// TODO(manoskouk): Also export tables.
private func buildExportedSection() throws {
self.bytecode += [WasmSection.export.rawValue]
var temp = Data()
// TODO: Track the order in which globals are seen by the typer in the program builder and maybe export them by name here like they are seen.
// This would just be a 'correctness' fix as this mismatch does not have any implications, it should be fixed though to avoid issues down the road as this is a very subtle mismatch.
// Get the number of imported globals.
let importedGlobals = self.imports.map({$0.0}).filter {
typer.type(of: $0).Is(.object(ofGroup: "WasmGlobal"))
}
temp += Leb128.unsignedEncode(self.functions.count + importedGlobals.count + self.globals.count + self.tables.count)
for (idx, _) in self.functions.enumerated() {
// Append the name as a vector
let name = WasmLifter.nameOfFunction(idx)
temp += Leb128.unsignedEncode(name.count)
temp += name.data(using: .utf8)!
// Add the base, as our exports start after the imports. This variable needs to be incremented in the `buildImportSection` function.
temp += [0x0, UInt8(functionIdxBase + idx)]
}
// export all globals that are imported.
for (idx, imp) in importedGlobals.enumerated() {
// Append the name as a vector
// Here for the name, we use the index as remembered by the globalsOrder array, to preserve the export order with what the typer of the ProgramBuilder has seen before.
let index = self.globalOrder.firstIndex(of: imp)!
let name = WasmLifter.nameOfGlobal(index)
temp += Leb128.unsignedEncode(name.count)
temp += name.data(using: .utf8)!
temp += [0x3, UInt8(idx)]
}
// Also export all globals that we have defined.
for (idx, instruction) in self.globals.enumerated() {
// Append the name as a vector
// Again, the name that we export it as matches the order that the ProgramBuilder's typer has seen it when traversing the Code, which happen's way before our typer here sees it, as we are typing during *lifting* of the JS code.
// This kinda solves a problem we don't actually have... but it's correct this way :)
let index = self.globalOrder.firstIndex(of: instruction.output)!
let name = WasmLifter.nameOfGlobal(index)
temp += Leb128.unsignedEncode(name.count)
temp += name.data(using: .utf8)!
// Add the base, as our exports start after the imports. This variable needs to be incremented in the `buildImportSection` function.
// TODO: maybe add something like a global base?
temp += [0x3, UInt8(importedGlobals.count + idx)]
}
for instruction in self.tables {
let index = try resolveIdx(ofType: .table, for: instruction.output)
let name = WasmLifter.nameOfTable(index)
temp += Leb128.unsignedEncode(name.count)
temp += name.data(using: .utf8)!
temp += [0x1, UInt8(index)]
}
// TODO(mliedtke): Export defined tags.
// Append the length of the section and the section contents itself.
self.bytecode.append(Leb128.unsignedEncode(temp.count))
self.bytecode.append(temp)
if verbose {
print("export section is")
for byte in temp {
print(String(format: "%02X ", byte))
}
}
}
/// This function updates the internal state of the lifter before actually emitting code to the wasm module. This should be invoked before we try to get the corresponding bytes for the Instruction
private func updateLifterState(wasmInstruction instr: Instruction) -> Bool {
// Make sure that we actually have a Wasm operation here.
assert(instr.op is WasmOperation)
switch instr.op.opcode {
case .wasmBeginBlock(let op):
// TODO(mliedtke): Repeat this for loops.
registerSignature(op.signature)
self.currentFunction!.labelBranchDepthMapping[instr.innerOutput(0)] = self.currentFunction!.variableAnalyzer.wasmBranchDepth
// Needs typer analysis
return true
case .wasmBeginIf(let op):
registerSignature(op.signature)
self.currentFunction!.labelBranchDepthMapping[instr.innerOutput(0)] = self.currentFunction!.variableAnalyzer.wasmBranchDepth
// Needs typer analysis
return true
case .wasmBeginElse(_):
// Note: We need to subtract one because the begin else block closes the if block before opening the else block!
self.currentFunction!.labelBranchDepthMapping[instr.innerOutput(0)] = self.currentFunction!.variableAnalyzer.wasmBranchDepth - 1
// Needs typer analysis
return true
case .wasmBeginTry(let op):
registerSignature(op.signature)
self.currentFunction!.labelBranchDepthMapping[instr.innerOutput(0)] = self.currentFunction!.variableAnalyzer.wasmBranchDepth
// Needs typer analysis
return true
case .wasmBeginTryDelegate(let op):
registerSignature(op.signature)
self.currentFunction!.labelBranchDepthMapping[instr.innerOutput(0)] = self.currentFunction!.variableAnalyzer.wasmBranchDepth
// Needs typer analysis
return true
case .wasmBeginLoop(let op):
registerSignature(op.signature)
self.currentFunction!.labelBranchDepthMapping[instr.innerOutput(0)] = self.currentFunction!.variableAnalyzer.wasmBranchDepth
// Needs typer analysis
return true
case .wasmBeginCatch(_):
self.currentFunction!.labelBranchDepthMapping[instr.innerOutput(0)] = self.currentFunction!.variableAnalyzer.wasmBranchDepth - 1
self.currentFunction!.labelBranchDepthMapping[instr.innerOutput(1)] = self.currentFunction!.variableAnalyzer.wasmBranchDepth - 1
assert(self.imports.contains(where: { $0.0 == instr.input(0)}) || self.tags.contains(instr.input(0)))
// Needs typer analysis
return true
case .wasmBeginCatchAll(_):
self.currentFunction!.labelBranchDepthMapping[instr.innerOutput(0)] = self.currentFunction!.variableAnalyzer.wasmBranchDepth - 1
// Needs typer analysis
return true
case .wasmNop(_):
// Just analyze the instruction but do nothing else here.
// This lets the typer know that we can skip this instruction without breaking any analysis.
break
case .beginWasmFunction(let op):
functions.append(FunctionInfo(op.signature, Data(), for: self, withArguments: Array(instr.innerOutputs)))
// Set the current active function as we are *actively* in it.
currentFunction = functions.last
case .endWasmFunction(_):
// TODO: Make sure that the stack is matching the output of the function signature, at least depth wise
// Make sure that we exit the current function, this is necessary such that the variableAnalyzer can be reset too, it is local to a function definition and we should only pass .wasmFunction context instructions to the variableAnalyzer.
currentFunction!.outputVariable = instr.output
currentFunction = nil
break
case .wasmDefineGlobal(_):
assert(self.globals.contains(where: { $0.output == instr.output }))
case .wasmDefineTable(_):
assert(self.tables.contains(where: { $0.output == instr.output }))
case .wasmDefineMemory(_):
assert(self.memories.contains(where: { $0.output == instr.output }))
case .wasmJsCall(_):
assert(self.imports.contains(where: { $0.0 == instr.input(0)}))
return true
case .wasmThrow(_):
assert(self.imports.contains(where: { $0.0 == instr.input(0)}) || self.tags.contains(instr.input(0)))
return true
case .wasmDefineTag(_):
assert(self.tags.contains(instr.output))
default:
return true
}
return false
}
// requires that the instr has been analyzed before. Maybe assert that?
private func emitInputLoadsIfNecessary(forInstruction instr: Instruction) {
// Don't emit loads for reassigns. This is specially handled in the `lift` function for reassigns.
if instr.op is WasmReassign {
return
}
// Check if instruction input is a parameter or if we have an expression for it, if so, we need to load it now.
for input in instr.inputs {
// Skip "internal" inputs, i.e. ones that don't map to a slot, such as .label variables
let inputType = typer.type(of: input)
if inputType.Is(.anyLabel) || inputType.Is(.exceptionLabel) {
continue
}
// If we have a stackslot, i.e. it is a local, or argument, then add the stack load.
if currentFunction!.getStackSlot(for: input) != nil {
// Emit stack load here now.
currentFunction!.addStackLoad(for: input)
continue
}
// Load the input now. For "internal" variables, we should not have an expression.
if let expr = self.writer.getExpr(for: input) {
currentFunction!.appendToCode(expr)
continue
}
// Special inputs that aren't locals (e.g. memories, functions, tags, ...)
let isLocallyDefined = inputType.isWasmTagType && tags.contains(input)
|| inputType.isWasmTableType && tables.contains(where: {$0.output == input})
|| inputType.Is(.wasmFuncRef) && functions.contains(where: {$0.outputVariable == input})
|| inputType.isWasmGlobalType && globals.contains(where: {$0.output == input})
|| inputType.isWasmMemoryType && memories.contains(where: {$0.output == input})
if !isLocallyDefined {
assert(self.imports.contains(where: {$0.0 == input}), "Variable \(input) needs to be imported during importAnalysis()")
}
}
}
private func emitBytesForInstruction(forInstruction instr: Instruction) throws {
currentFunction!.appendToCode(try lift(instr))
}
private func emitStackSpillsIfNecessary(forInstruction instr: Instruction) {
// Don't emit spills for reassigns. This is specially handled in the `lift` function for reassigns.
if instr.op is WasmReassign {
return
}
// If we have an output, make sure we store it on the stack as this is a "complex" instruction, i.e. has inputs and outputs
if instr.numOutputs > 0 {
assert(!typer.type(of: instr.output).Is(.anyLabel))
// Also spill the instruction
currentFunction!.spillLocal(forVariable: instr.output)
// Add the corresponding stack load as an expression, this adds the number of arguments, as output vars always live after the function arguments.
self.writer.addExpr(for: instr.output, bytecode: Data([0x20, UInt8(currentFunction!.localsInfo.count - 1)]))
}