module WASM-DATA-SYNTAX
imports WASM-DATA-COMMON-SYNTAX
endmodule
module WASM-DATA-COMMON-SYNTAX
imports INT-SYNTAX
imports FLOAT-SYNTAX
syntax IdentifierToken [token]
syntax WasmIntToken [token]
syntax #Layout [token]
syntax WasmStringToken [token]
// ------------------------------
And we use OptionalId
to handle the case where an identifier could be omitted.
syntax Identifier ::= IdentifierToken
syntax OptionalId ::= "" [klabel(.Identifier), symbol]
| Identifier
// --------------------------------
Wasm binary data can sometimes be specified as a string. The string considered to represent the sequence of UTF-8 bytes that encode it. The exception is for characters that are explicitly escaped which can represent bytes in hexadecimal form.
syntax WasmString ::= ".WasmString"
| WasmStringToken
// -------------------------------------
syntax String ::= #parseWasmString ( WasmStringToken ) [function, functional, hook(STRING.token2string)]
// ----------------------------------------------------------------------------------------------------------
syntax DataString ::= List{WasmString, ""} [klabel(listWasmString)]
// -------------------------------------------------------------------
Many constructions in Wasm, such a functions, labels, locals, types, etc., are referred to by their index.
In the abstract syntax of Wasm, indices are 32 bit unsigned integers.
However, we extend the Index
sort with another subsort, Identifier
, in the text format.
syntax Index ::= WasmInt
// ------------------------
Element Segment is a list of indices.
It is used when initializing a WebAssembly table, or used as the parameter of the br_table
function.
syntax ElemSegment ::= List{Index, ""} [klabel(listIndex)]
// ----------------------------------------------------------
WebAssembly has four basic types, for 32 and 64 bit integers and floats.
syntax IValType ::= "i32" [klabel(i32), symbol] | "i64" [klabel(i64), symbol]
syntax FValType ::= "f32" [klabel(f32), symbol] | "f64" [klabel(f64), symbol]
syntax ValType ::= IValType | FValType
// ---------------------------------------
There are two basic type-constructors: sequencing ([_]
) and function spaces (_->_
).
syntax ValTypes ::= List{ValType, ""} [klabel(listValTypes), symbol]
// --------------------------------------------------------------------
Here we define the rules about what integer formats can be used in Wasm. For the core language, only regular integers are allowed.
syntax WasmInt ::= Int
// ----------------------
syntax Mut ::= "const" [klabel(mutConst), symbol]
| "var" [klabel(mutVar), symbol]
// -----------------------------------------------
WebAssembly values are either integers or floating-point numbers, of 32- or 64-bit widths.
syntax Number ::= Int | Float
// -----------------------------
An external value
is the runtime representation of an entity that can be imported
or exported
.
It is an address
denoting either a function instance
, table instance
, memory instance
, or global instances
in the shared store.
syntax AllocatedKind ::= "func" | "table" | "memory" | "global"
syntax Externval ::= AllocatedKind Index
// --------------------------------------------
endmodule
module WASM-DATA
imports WASM-DATA-COMMON-SYNTAX
imports INT
imports BOOL
imports STRING
imports LIST
imports MAP
imports FLOAT
imports BYTES
imports K-EQUAL
In KWASM
rules, we use #freshId ( Int )
to generate a fresh identifier based on the element index in the current module.
syntax Identifier ::= #freshId ( Int )
// --------------------------------------
In KWasm we store identifiers in maps from Identifier
to Int
, the Int
being an index.
This rule handles adding an OptionalId
as a map key, but only when it is a proper identifier.
syntax Map ::= #saveId (Map, OptionalId, Int) [function]
// -------------------------------------------------------
rule #saveId (MAP, ID:OptionalId, _) => MAP requires notBool isIdentifier(ID)
rule #saveId (MAP, ID:Identifier, IDX) => MAP [ID <- IDX]
Int
is the basic form of index, and indices always need to resolve to integers.
In the text format, Index
is extended with the Identifier
subsort, and there needs to be a way to resolve it to an Int
.
In Wasm, the current "context" contains a mapping from identifiers to indices.
The #ContextLookup
function provides an extensible way to get an Int
from an index.
Any extension of the Index
type requires that the function #ContextLookup
is suitably extended.
For Int
, however, a the context is irrelevant and the index always just resolves to the integer.
syntax Int ::= #ContextLookup ( Map , Index ) [function]
// --------------------------------------------------------
rule #ContextLookup(_IDS:Map, I:Int) => I
syntax Ints ::= List{Int, ""} [klabel(listInt), symbol]
// -------------------------------------------------------
syntax Int ::= #lenElemSegment (ElemSegment) [function]
syntax Index ::= #getElemSegment (ElemSegment, Int) [function]
syntax Int ::= #lenInts (Ints) [function]
syntax Int ::= #getInts (Ints, Int) [function]
// --------------------------------------------------------------
rule #lenElemSegment(.ElemSegment) => 0
rule #lenElemSegment(_TFIDX ES) => 1 +Int #lenElemSegment(ES)
rule #getElemSegment(E _ES, 0) => E
rule #getElemSegment(_E ES, I) => #getElemSegment(ES, I -Int 1) requires I >Int 0
rule #lenInts(.Ints) => 0
rule #lenInts(_TFIDX ES) => 1 +Int #lenInts(ES)
rule #getInts(E _ES, 0) => E
rule #getInts(_E ES, I) => #getInts(ES, I -Int 1) requires I >Int 0
syntax Ints ::= elemSegment2Ints ( ElemSegment ) [function]
// -----------------------------------------------------------
rule elemSegment2Ints(.ElemSegment) => .Ints
rule elemSegment2Ints(E:Int ES) => E elemSegment2Ints(ES)
In some cases, an integer is optional, such as when either giving or omitting the max bound when defining a table or memory.
The sort OptionalInt
provides this potentially "undefined" Int
.
syntax OptionalInt ::= Int | ".Int"
// -----------------------------------
Tables and memories have limits, defined as either a single Int
or two Int
s, representing min and max bounds.
syntax Limits ::= #limitsMin(Int) [klabel(limitsMin), symbol]
| #limits(Int, Int) [klabel(limitsMinMax), symbol]
// ------------------------------------------------------------------
There are two basic type-constructors: sequencing ([_]
) and function spaces (_->_
).
syntax VecType ::= "[" ValTypes "]" [klabel(aVecType), symbol]
// ---------------------------------------------------------------
syntax FuncType ::= VecType "->" VecType [klabel(aFuncType), symbol]
// --------------------------------------------------------------------
syntax Int ::= lengthValTypes ( ValTypes ) [function, functional]
// -----------------------------------------------------------------
rule lengthValTypes(.ValTypes) => 0
rule lengthValTypes(_V VS) => 1 +Int lengthValTypes(VS)
All told, a Type
can be a value type, vector of types, or function type.
syntax Type ::= ".Type" | ValType | VecType | FuncType
// ------------------------------------------------------
We can append two ValTypes
s with the _+_
operator.
syntax ValTypes ::= ValTypes "+" ValTypes [function, functional]
// ----------------------------------------------------------------
rule .ValTypes + VTYPES' => VTYPES'
rule (VT VTYPES) + VTYPES' => VT (VTYPES + VTYPES')
Also we can reverse a ValTypes
with #revt
syntax ValTypes ::= #revt ( ValTypes ) [function, functional]
| #revt ( ValTypes , ValTypes ) [function, functional, klabel(#revtAux)]
// ------------------------------------------------------------------------------------------
rule #revt(VT) => #revt(VT, .ValTypes)
rule #revt(.ValTypes, VT') => VT'
rule #revt(V VT , VT') => #revt(VT, V VT')
The #width
function returns the bit-width of a given IValType
.
syntax Int ::= #width ( IValType ) [function, functional]
syntax Int ::= #numBytes ( IValType ) [function, functional, smtlib(numBytes)]
// ------------------------------------------------------------------------------
rule #width(i32) => 32
rule #width(i64) => 64
rule #numBytes(ITYPE) => #width(ITYPE) /Int 8 [concrete]
2 ^Int 32
and 2 ^Int 64
are used often enough to warrant providing helpers for them.
syntax Int ::= #pow ( IValType ) [function, functional, smtlib(pow )] /* 2 ^Int #width(T) */
| #pow1 ( IValType ) [function, functional, smtlib(pow1)] /* 2 ^Int (#width(T) -Int 1) */
// ------------------------------------------------------------------------------------------------------
rule #pow1(i32) => 2147483648
rule #pow (i32) => 4294967296
rule #pow1(i64) => 9223372036854775808
rule #pow (i64) => 18446744073709551616
syntax Mut ::= ".Mut"
// ----------------------
Proper values are numbers annotated with their types.
syntax IVal ::= "<" IValType ">" Int [klabel(<_>_)]
syntax FVal ::= "<" FValType ">" Float [klabel(<_>_)]
syntax Val ::= "<" ValType ">" Number [klabel(<_>_)]
| IVal | FVal
// ---------------------------
We also add undefined
as a value, which makes many partial functions in the semantics total.
syntax Val ::= "undefined"
// --------------------------
The #chop
function will ensure that an integer value is wrapped to the correct bit-width.
The #wrap
function wraps an integer to a given byte width.
syntax IVal ::= #chop ( IVal ) [function, functional]
// -----------------------------------------------------
rule #chop(< ITYPE > N) => < ITYPE > (N modInt #pow(ITYPE))
syntax Int ::= #wrap ( Int , Int ) [function, functional]
// ---------------------------------------------------------
rule [wrap-Positive] : #wrap(WIDTH, N) => N &Int ((1 <<Int (WIDTH *Int 8)) -Int 1) requires 0 <Int WIDTH
rule #wrap(WIDTH, _N) => 0 requires notBool 0 <Int WIDTH
In K
all Float
numbers are of 64-bits width by default, so we need to downcast a f32
float to 32-bit manually.
The #round
function casts a f64
float to a f32
float.
f64
floats has 1 bit for the sign, 53 bits for the value and 11 bits for exponent.
f32
floats has 1 bit for the sign, 23 bits for the value and 8 bits for exponent.
syntax FVal ::= #round ( FValType , Number ) [function]
// -------------------------------------------------------
rule #round(f64 , N:Float) => < f64 > roundFloat(N, 53, 11) [concrete]
rule #round(f32 , N:Float) => < f32 > roundFloat(N, 24, 8) [concrete]
rule #round(f64 , N:Int ) => < f64 > Int2Float(N, 53, 11) [concrete]
rule #round(f32 , N:Int ) => < f32 > Int2Float(N, 24, 8) [concrete]
Functions #signed
and #unsigned
allow for easier operation on twos-complement numbers.
These functions assume that the argument integer is in the valid range of signed and unsigned values of the respective type, so they will not correctly map arbitrary integers into the corret range.
Some operations extend integers from 1, 2, or 4 bytes, so a special function with a bit width argument helps with the conversion.
syntax Int ::= #signed ( IValType , Int ) [function]
| #unsigned ( IValType , Int ) [function]
| #signedWidth ( Int , Int ) [function]
// ---------------------------------------------------------
rule #signed(ITYPE, N) => N requires 0 <=Int N andBool N <Int #pow1(ITYPE)
rule #signed(ITYPE, N) => N -Int #pow(ITYPE) requires #pow1(ITYPE) <=Int N andBool N <Int #pow (ITYPE)
rule #unsigned( ITYPE, N) => N +Int #pow(ITYPE) requires N <Int 0
rule #unsigned(_ITYPE, N) => N requires 0 <=Int N
rule #signedWidth(1, N) => N requires 0 <=Int N andBool N <Int 128
rule #signedWidth(1, N) => N -Int 256 requires 128 <=Int N andBool N <Int 256
rule #signedWidth(2, N) => N requires 0 <=Int N andBool N <Int 32768
rule #signedWidth(2, N) => N -Int 65536 requires 32768 <=Int N andBool N <Int 65536
rule #signedWidth(4, N) => #signed(i32, N)
Function #bool
converts a Bool
into an Int
.
syntax Int ::= #bool ( Bool ) [function, functional]
// ----------------------------------------------------
rule #bool( B:Bool ) => 1 requires B
rule #bool( B:Bool ) => 0 requires notBool B
WebAssembly is a stack-machine, so here we provide the stack to operate over.
Operator _++_
implements an append operator for sort ValStack
.
syntax ValStack ::= ".ValStack"
| Val ":" ValStack
| ValStack "++" ValStack [function, functional]
// -----------------------------------------------------------------
rule .ValStack ++ VALSTACK' => VALSTACK'
rule (SI : VALSTACK) ++ VALSTACK' => SI : (VALSTACK ++ VALSTACK')
#zero
will create a specified stack of zero values in a given type.
#take
will take the prefix of a given stack.
#drop
will drop the prefix of a given stack.
#revs
will reverse a given stack.
NOTE: #take
and #drop
are total, so in case they could not take/drop enough values before running out, they just return the empty .ValStack
.
Each call site must ensure that this is desired behavior before using these functions.
syntax ValStack ::= #zero ( ValTypes ) [function, functional]
| #take ( Int , ValStack ) [function, functional]
| #drop ( Int , ValStack ) [function, functional]
| #revs ( ValStack ) [function, functional]
| #revs ( ValStack , ValStack ) [function, functional, klabel(#revsAux)]
// ------------------------------------------------------------------------------------------
rule #zero(.ValTypes) => .ValStack
rule #zero(ITYPE:IValType VTYPES) => < ITYPE > 0 : #zero(VTYPES)
rule #zero(FTYPE:FValType VTYPES) => < FTYPE > 0.0 : #zero(VTYPES)
rule #take(N, _) => .ValStack requires notBool N >Int 0
rule #take(N, .ValStack) => .ValStack requires N >Int 0
rule #take(N, V : VS) => V : #take(N -Int 1, VS) requires N >Int 0
rule #drop(N, VS) => VS requires notBool N >Int 0
rule #drop(N, .ValStack) => .ValStack requires N >Int 0
rule #drop(N, _ : VS) => #drop(N -Int 1, VS) requires N >Int 0
rule #revs(VS) => #revs(VS, .ValStack)
rule #revs(.ValStack, VS') => VS'
rule #revs(V : VS , VS') => #revs(VS, V : VS')
Wasm uses a different character escape rule with K, so we need to define the unescape
function ourselves.
syntax String ::= unescape(String) [function]
| unescape(String, Int, String) [function, klabel(unescapeAux)]
// -------------------------------------------------------------------------------
rule unescape(S ) => unescape(S, 1, "")
rule unescape(S, IDX, SB) => SB requires IDX ==Int lengthString(S) -Int 1
Unescaped characters just directly gets added to the output. The escaped character starts with a "" and followed by 2 hexdigits will be converted to a unescaped character before stored.
rule unescape(S, IDX, SB) => unescape(S, IDX +Int 1, SB +String substrString(S, IDX, IDX +Int 1))
requires IDX <Int lengthString(S) -Int 1
andBool substrString(S, IDX, IDX +Int 1) =/=K "\\"
rule unescape(S, IDX, SB) => unescape(S, IDX +Int 3, SB +String chrChar(String2Base(substrString(S, IDX +Int 1, IDX +Int 3), 16)))
requires IDX <Int lengthString(S) -Int 3
andBool substrString(S, IDX, IDX +Int 1) ==K "\\"
andBool (findChar("0123456789abcdefABCDEF", substrString(S, IDX +Int 1, IDX +Int 2), 0) =/=Int -1 )
The characters "\t", "\n", "\r", """, "'", and "" are interpreted as regular escaped ascii symbols.
rule unescape(S, IDX, SB) => unescape(S, IDX +Int 2, SB +String chrChar(String2Base("09", 16)))
requires IDX <Int lengthString(S) -Int 2
andBool substrString(S, IDX, IDX +Int 1) ==K "\\"
andBool substrString(S, IDX +Int 1, IDX +Int 2) ==K "t"
rule unescape(S, IDX, SB) => unescape(S, IDX +Int 2, SB +String chrChar(String2Base("0A", 16)))
requires IDX <Int lengthString(S) -Int 2
andBool substrString(S, IDX, IDX +Int 1) ==K "\\"
andBool substrString(S, IDX +Int 1, IDX +Int 2) ==K "n"
rule unescape(S, IDX, SB) => unescape(S, IDX +Int 2, SB +String chrChar(String2Base("0D", 16)))
requires IDX <Int lengthString(S) -Int 2
andBool substrString(S, IDX, IDX +Int 1) ==K "\\"
andBool substrString(S, IDX +Int 1, IDX +Int 2) ==K "r"
rule unescape(S, IDX, SB) => unescape(S, IDX +Int 2, SB +String chrChar(String2Base("22", 16)))
requires IDX <Int lengthString(S) -Int 2
andBool substrString(S, IDX, IDX +Int 1) ==K "\\"
andBool substrString(S, IDX +Int 1, IDX +Int 2) ==K "\""
rule unescape(S, IDX, SB) => unescape(S, IDX +Int 2, SB +String chrChar(String2Base("27", 16)))
requires IDX <Int lengthString(S) -Int 2
andBool substrString(S, IDX, IDX +Int 1) ==K "\\"
andBool substrString(S, IDX +Int 1, IDX +Int 2) ==K "'"
rule unescape(S, IDX, SB) => unescape(S, IDX +Int 2, SB +String chrChar(String2Base("5C", 16)))
requires IDX <Int lengthString(S) -Int 2
andBool substrString(S, IDX, IDX +Int 1) ==K "\\"
andBool substrString(S, IDX +Int 1, IDX +Int 2) ==K "\\"
Longer byte sequences can be encoded as escaped "Unicode", with \u{<hexdigit>+}
.
The implementation is not correct for now because the UTF-8 encoding is not implemented.
syntax Int ::= #idxCloseBracket ( String, Int ) [function]
// ----------------------------------------------------------
rule #idxCloseBracket ( S, I ) => I requires substrString(S, I, I +Int 1) ==String "}"
rule #idxCloseBracket ( S, I ) => #idxCloseBracket ( S, I +Int 1 ) requires substrString(S, I, I +Int 1) =/=String "}"
syntax Bytes ::= #encodeUTF8 ( Int ) [function]
// -----------------------------------------------
rule #encodeUTF8 (I) => Int2Bytes(I, BE, Unsigned) requires I <=Int 127
rule #encodeUTF8 (I) => Int2Bytes(((((I &Int 1984) >>Int 6) +Int 192) <<Int 8) +Int ((I &Int 63) +Int 128), BE, Unsigned)
requires I >=Int 128 andBool I <=Int 2047
rule #encodeUTF8 (I) => Int2Bytes(((((I &Int 61440) >>Int 12) +Int 224) <<Int 16) +Int ((((I &Int 4032) >>Int 6) +Int 128) <<Int 8) +Int ((I &Int 63) +Int 128), BE, Unsigned)
requires I >=Int 2048 andBool I <=Int 65535
rule #encodeUTF8 (I) => Int2Bytes(((((I &Int 1835008) >>Int 18) +Int 240) <<Int 24) +Int ((((I &Int 258048) >>Int 12) +Int 128) <<Int 16) +Int ((((I &Int 4032) >>Int6) +Int 128) <<Int 8) +Int ((I &Int 63) +Int 128), BE, Unsigned)
requires I >=Int 65536 andBool I <=Int 1114111
rule unescape(S, IDX, SB) => unescape(S, #idxCloseBracket(S, IDX) +Int 1, SB +String Bytes2String(#encodeUTF8(String2Base(substrString(S, IDX +Int 3, #idxCloseBracket(S, IDX +Int 3)), 16))))
requires substrString(S, IDX, IDX +Int 1) ==K "\\"
andBool substrString(S, IDX +Int 1, IDX +Int 2) ==K "u"
DataString
, as is defined in the wasm semantics, is a list of WasmString
s.
#concatDS
concatenates them together into a single string.
The strings to connect needs to be unescaped before concatenated, because the unescape
function removes the quote sign "
before and after each substring.
syntax String ::= #concatDS ( DataString ) [function]
| #concatDS ( DataString, String ) [function, klabel(#concatDSAux)]
// -----------------------------------------------------------------------------------
rule #concatDS ( DS ) => #concatDS ( DS, "" )
rule #concatDS ( .DataString , S ) => S
rule #concatDS ( WS:WasmStringToken DS , S ) => #concatDS ( DS , S +String unescape(#parseWasmString(WS)) )
#DS2Bytes
converts a DataString
to a K builtin Bytes
.
syntax Bytes ::= #DS2Bytes (DataString) [function]
// --------------------------------------------------
rule #DS2Bytes(DS) => String2Bytes(#concatDS(DS))
Wasm memories are byte arrays, sized in pages of 65536 bytes, initialized to be all zero bytes.
#setBytesRange(BM, START, BS)
assigns a contiguous chunk ofBS
toBM
starting at positionN
.#setRange(BM, START, VAL, WIDTH)
writes the integerI
to memory as bytes (little-endian), starting at indexN
.
syntax Bytes ::= #setBytesRange ( Bytes , Int , Bytes ) [function, functional]
// ------------------------------------------------------------------------------
rule #setBytesRange(BM, START, BS) => replaceAtBytes(padRightBytes(BM, START +Int lengthBytes(BS), 0), START, BS)
requires 0 <=Int START
rule #setBytesRange(_, START, _ ) => .Bytes
requires notBool (0 <=Int START)
syntax Bytes ::= #setRange ( Bytes , Int , Int , Int ) [function, functional, smtlib(setRange)]
// -----------------------------------------------------------------------------------------------
rule #setRange(BM, ADDR, VAL, WIDTH) => BM
requires notBool (0 <Int WIDTH andBool 0 <=Int VAL andBool 0 <=Int ADDR)
rule [setRange-Positive] : #setRange(BM, ADDR, VAL, WIDTH) => #setBytesRange(BM, ADDR, Int2Bytes(WIDTH, VAL, LE))
requires 0 <Int WIDTH andBool 0 <=Int VAL andBool 0 <=Int ADDR
#getBytesRange(BM, START, WIDTH)
reads offWIDTH
elements fromBM
beginning at positionSTART
(padding with zeros as needed).#getRange(BM, START, WIDTH)
reads offWIDTH
elements fromBM
beginning at positionSTART
, and converts it into an unsigned integer. The function interprets the range of bytes as little-endian.
syntax Bytes ::= #getBytesRange ( Bytes , Int , Int ) [function, functional]
// ----------------------------------------------------------------------------
rule #getBytesRange(_, START, WIDTH) => .Bytes
requires notBool (0 <=Int START andBool 0 <=Int WIDTH)
rule #getBytesRange(BM, START, WIDTH) => substrBytes(padRightBytes(BM, START +Int WIDTH, 0), START, START +Int WIDTH)
requires (0 <=Int START andBool 0 <=Int WIDTH)
andBool START <Int lengthBytes(BM)
rule #getBytesRange(BM, START, WIDTH) => padRightBytes(.Bytes, WIDTH, 0)
requires (0 <=Int START andBool 0 <=Int WIDTH)
andBool notBool (START <Int lengthBytes(BM))
syntax Int ::= #getRange(Bytes, Int, Int) [function, functional, smtlib(getRange)]
// ----------------------------------------------------------------------------------
rule #getRange( _, ADDR, WIDTH) => 0
requires notBool (0 <Int WIDTH andBool 0 <=Int ADDR)
rule [getRange-Positive] : #getRange(BM, ADDR, WIDTH) => Bytes2Int(#getBytesRange(BM, ADDR, WIDTH), LE, Unsigned)
requires 0 <Int WIDTH andBool 0 <=Int ADDR
endmodule