diff --git a/collector/compile-benchmarks/README.md b/collector/compile-benchmarks/README.md index 1cd6cf93a..10be4a8ba 100644 --- a/collector/compile-benchmarks/README.md +++ b/collector/compile-benchmarks/README.md @@ -26,6 +26,7 @@ They mostly consist of real-world crates. Rust programs. - **cranelift-codegen-0.82.1**: The largest crate from a code generator. Used by wasmtime. Stresses obligation processing. +- **cranelift-codegen-0.119.0**: The largest crate from a code generator. Used by wasmtime. Stresses obligation processing. - **diesel-1.4.8**: A type safe SQL query builder. Utilizes the type system to ensure a lot of invariants. Stresses anything related to resolving trait bounds, by having a lot of trait impls for a large number of different diff --git a/collector/compile-benchmarks/REUSE.toml b/collector/compile-benchmarks/REUSE.toml index bf3933803..1f2a6d23e 100644 --- a/collector/compile-benchmarks/REUSE.toml +++ b/collector/compile-benchmarks/REUSE.toml @@ -47,6 +47,11 @@ path = "cranelift-codegen-0.82.1/**" SPDX-FileCopyrightText = "The Cranelift Project Developers" SPDX-License-Identifier = "Apache-2.0 WITH LLVM-exception" +[[annotations]] +path = "cranelift-codegen-0.119.0/**" +SPDX-FileCopyrightText = "The Cranelift Project Developers" +SPDX-License-Identifier = "Apache-2.0 WITH LLVM-exception" + [[annotations]] path = "ctfe-stress-5/**" SPDX-FileCopyrightText = "The Rust Project Developers (see https://thanks.rust-lang.org)" diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/.cargo_vcs_info.json b/collector/compile-benchmarks/cranelift-codegen-0.119.0/.cargo_vcs_info.json new file mode 100644 index 000000000..7de0ccdf8 --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/.cargo_vcs_info.json @@ -0,0 +1,6 @@ +{ + "git": { + "sha1": "d3054950c67c340491f4f48021e58c11002e7513" + }, + "path_in_vcs": "cranelift/codegen" +} \ No newline at end of file diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/0-println.patch b/collector/compile-benchmarks/cranelift-codegen-0.119.0/0-println.patch new file mode 100644 index 000000000..af67f597d --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/0-println.patch @@ -0,0 +1,12 @@ +diff --git a/src/loop_analysis.rs b/src/loop_analysis.rs +index 71f84565..e8e2dcc6 100644 +--- a/src/loop_analysis.rs ++++ b/src/loop_analysis.rs +@@ -230,6 +230,7 @@ impl LoopAnalysis { + domtree: &DominatorTree, + layout: &Layout, + ) { ++ println!("testing"); + let mut stack: Vec = Vec::new(); + // We handle each loop header in reverse order, corresponding to a pseudo postorder + // traversal of the graph. diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/Cargo.lock b/collector/compile-benchmarks/cranelift-codegen-0.119.0/Cargo.lock new file mode 100644 index 000000000..6ad45ab4e --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/Cargo.lock @@ -0,0 +1,1111 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "aho-corasick" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41" +dependencies = [ + "memchr", +] + +[[package]] +name = "allocator-api2" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45862d1c77f2228b9e10bc609d5bc203d86ebc9b87ad8d5d5167a6c9abf739d9" + +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + +[[package]] +name = "anstream" +version = "0.6.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" + +[[package]] +name = "anstyle-parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "938874ff5980b03a87c5524b3ae5b59cf99b1d6bc836848df7bc5ada9643c333" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b" +dependencies = [ + "windows-sys 0.48.0", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8" +dependencies = [ + "anstyle", + "windows-sys 0.52.0", +] + +[[package]] +name = "anyhow" +version = "1.0.93" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c95c10ba0b00a02636238b814946408b1322d5ac4760326e6fb8ec956d85775" + +[[package]] +name = "arbitrary" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dde20b3d026af13f561bdd0f15edf01fc734f0dafcedbaf42bba506a9517f223" + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "bitflags" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" + +[[package]] +name = "block-buffer" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf7fe51849ea569fd452f37822f606a5cabb684dc918707a0193fd4664ff324" +dependencies = [ + "generic-array", +] + +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" +dependencies = [ + "allocator-api2", +] + +[[package]] +name = "capstone" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "015ef5d5ca1743e3f94af9509ba6bd2886523cfee46e48d15c2ef5216fd4ac9a" +dependencies = [ + "capstone-sys", + "libc", +] + +[[package]] +name = "capstone-sys" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2267cb8d16a1e4197863ec4284ffd1aec26fe7e57c58af46b02590a0235809a0" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + +[[package]] +name = "cc" +version = "1.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31a0499c1dc64f458ad13872de75c0eb7e3fdb0e67964610c914b034fc5956e" +dependencies = [ + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "ciborium" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0c137568cc60b904a7724001b35ce2630fd00d5d84805fbb608ab89509d788f" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "346de753af073cc87b52b2083a506b38ac176a44cfb05497b622e27be899b369" + +[[package]] +name = "ciborium-ll" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "213030a2b5a4e0c0892b6652260cf6ccac84827b83a85a534e178e3906c4cf1b" +dependencies = [ + "ciborium-io", + "half", +] + +[[package]] +name = "clap" +version = "4.5.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e5a21b8495e732f1b3c364c9949b201ca7bae518c502c80256c96ad79eaf6ac" +dependencies = [ + "clap_builder", +] + +[[package]] +name = "clap_builder" +version = "4.5.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cf2dd12af7a047ad9d6da2b6b249759a22a7abc0f474c1dae1777afa4b21a73" +dependencies = [ + "anstyle", + "clap_lex", +] + +[[package]] +name = "clap_lex" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" + +[[package]] +name = "cobs" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15" + +[[package]] +name = "codespan-reporting" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e" +dependencies = [ + "termcolor", + "unicode-width", +] + +[[package]] +name = "colorchoice" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" + +[[package]] +name = "cpufeatures" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e4c1eaa2012c47becbbad2ab175484c2a84d1185b566fb2cc5b8707343dfe58" +dependencies = [ + "libc", +] + +[[package]] +name = "cranelift-assembler-x64" +version = "0.119.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "263cc79b8a23c29720eb596d251698f604546b48c34d0d84f8fd2761e5bf8888" +dependencies = [ + "cranelift-assembler-x64-meta", +] + +[[package]] +name = "cranelift-assembler-x64-meta" +version = "0.119.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b4a113455f8c0e13e3b3222a9c38d6940b958ff22573108be083495c72820e1" +dependencies = [ + "cranelift-srcgen", +] + +[[package]] +name = "cranelift-bforest" +version = "0.119.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58f96dca41c5acf5d4312c1d04b3391e21a312f8d64ce31a2723a3bb8edd5d4d" +dependencies = [ + "cranelift-entity", +] + +[[package]] +name = "cranelift-bitset" +version = "0.119.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d821ed698dd83d9c012447eb63a5406c1e9c23732a2f674fb5b5015afd42202" +dependencies = [ + "serde", + "serde_derive", +] + +[[package]] +name = "cranelift-codegen" +version = "0.119.0" +dependencies = [ + "anyhow", + "bumpalo", + "capstone", + "cranelift-assembler-x64", + "cranelift-bforest", + "cranelift-bitset", + "cranelift-codegen-meta", + "cranelift-codegen-shared", + "cranelift-control", + "cranelift-entity", + "cranelift-isle", + "criterion", + "env_logger", + "gimli", + "hashbrown", + "log", + "postcard", + "pulley-interpreter", + "regalloc2", + "rustc-hash", + "serde", + "serde_derive", + "sha2", + "similar", + "smallvec", + "souper-ir", + "target-lexicon", +] + +[[package]] +name = "cranelift-codegen-meta" +version = "0.119.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af2c215e0c9afa8069aafb71d22aa0e0dde1048d9a5c3c72a83cacf9b61fcf4a" +dependencies = [ + "cranelift-assembler-x64-meta", + "cranelift-codegen-shared", + "cranelift-srcgen", + "pulley-interpreter", +] + +[[package]] +name = "cranelift-codegen-shared" +version = "0.119.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97524b2446fc26a78142132d813679dda19f620048ebc9a9fbb0ac9f2d320dcb" + +[[package]] +name = "cranelift-control" +version = "0.119.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e32e900aee81f9e3cc493405ef667a7812cb5c79b5fc6b669e0a2795bda4b22" +dependencies = [ + "arbitrary", +] + +[[package]] +name = "cranelift-entity" +version = "0.119.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d16a2e28e0fa6b9108d76879d60fe1cc95ba90e1bcf52bac96496371044484ee" +dependencies = [ + "cranelift-bitset", + "serde", + "serde_derive", +] + +[[package]] +name = "cranelift-isle" +version = "0.119.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e916f36f183e377e9a3ed71769f2721df88b72648831e95bb9fa6b0cd9b1c709" +dependencies = [ + "codespan-reporting", +] + +[[package]] +name = "cranelift-srcgen" +version = "0.119.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47e1a86340a16e74b4285cc86ac69458fa1c8e7aaff313da4a89d10efd3535ee" + +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "is-terminal", + "itertools", + "num-traits", + "once_cell", + "oorandom", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e" +dependencies = [ + "cfg-if", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + +[[package]] +name = "either" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" + +[[package]] +name = "embedded-io" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced" + +[[package]] +name = "env_filter" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f2c92ceda6ceec50f43169f9ee8424fe2db276791afde7b2cd8bc084cb376ab" +dependencies = [ + "log", + "regex", +] + +[[package]] +name = "env_logger" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13fa619b91fb2381732789fc5de83b45675e882f66623b7d8cb4f643017018d" +dependencies = [ + "anstream", + "anstyle", + "env_filter", + "humantime", + "log", +] + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "errno" +version = "0.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" +dependencies = [ + "libc", + "windows-sys 0.59.0", +] + +[[package]] +name = "fallible-iterator" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" + +[[package]] +name = "generic-array" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd48d33ec7f05fbfa152300fdad764757cbded343c1aa1cff2fbaf4134851803" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "gimli" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32085ea23f3234fc7846555e85283ba4de91e21016dc0455a16286d87a292d64" +dependencies = [ + "fallible-iterator", + "indexmap", + "stable_deref_trait", +] + +[[package]] +name = "half" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" + +[[package]] +name = "hashbrown" +version = "0.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" + +[[package]] +name = "hermit-abi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" + +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + +[[package]] +name = "id-arena" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25a2bc672d1148e28034f176e01fffebb08b35768468cc954630da77a1449005" + +[[package]] +name = "indexmap" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62f822373a4fe84d4bb149bf54e584a7f4abec90e072ed49cda0edea5b95471f" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "is-terminal" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bad00257d07be169d870ab665980b06cdb366d792ad690bf2e76876dc503455" +dependencies = [ + "hermit-abi", + "rustix", + "windows-sys 0.52.0", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" + +[[package]] +name = "libc" +version = "0.2.171" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" + +[[package]] +name = "linux-raw-sys" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "oorandom" +version = "11.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" + +[[package]] +name = "postcard" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a55c51ee6c0db07e68448e336cf8ea4131a620edefebf9893e759b2d793420f8" +dependencies = [ + "cobs", + "embedded-io", + "serde", +] + +[[package]] +name = "proc-macro2" +version = "1.0.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "pulley-interpreter" +version = "32.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69c819888a64024f9c6bc7facbed99dfb4dd0124abe4335b6a54eabaa68ef506" +dependencies = [ + "cranelift-bitset", + "log", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rayon" +version = "1.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd99e5772ead8baa5215278c9b15bf92087709e9c1b2d1f97cdb5a183c933a7d" +dependencies = [ + "autocfg", + "crossbeam-deque", + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "regalloc2" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc06e6b318142614e4a48bc725abbf08ff166694835c43c9dae5a9009704639a" +dependencies = [ + "allocator-api2", + "bumpalo", + "hashbrown", + "log", + "rustc-hash", + "serde", + "smallvec", +] + +[[package]] +name = "regex" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39354c10dd07468c2e73926b23bb9c2caca74c5501e38a35da70406f1d923310" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2" + +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + +[[package]] +name = "rustix" +version = "0.38.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a78891ee6bf2340288408954ac787aa063d8e8817e9f53abb37c695c6d834ef6" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.59.0", +] + +[[package]] +name = "ryu" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73b4b750c782965c211b42f022f59af1fbceabdd026623714f104152f1ec149f" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "serde" +version = "1.0.215" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.215" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.140" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + +[[package]] +name = "sha2" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55deaec60f81eefe3cce0dc50bda92d6d8e88f2a27df7c5033b42afeb1ed2676" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "similar" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62ac7f900db32bf3fd12e0117dd3dc4da74bc52ebaac97f39668446d89694803" + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" +dependencies = [ + "serde", +] + +[[package]] +name = "souper-ir" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a50c18ce33988e1973003afbaa66e6a465ad7a614dc33f246879ccc209c2c044" +dependencies = [ + "id-arena", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + +[[package]] +name = "syn" +version = "2.0.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "target-lexicon" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ff4a4048091358129767b8a200d6927f58876c8b5ea16fb7b0222d43b79bfa8" + +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "typenum" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" + +[[package]] +name = "unicode-ident" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" + +[[package]] +name = "unicode-width" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" + +[[package]] +name = "utf8parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "walkdir" +version = "2.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/Cargo.toml b/collector/compile-benchmarks/cranelift-codegen-0.119.0/Cargo.toml new file mode 100644 index 000000000..1a1a2455f --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/Cargo.toml @@ -0,0 +1,252 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2021" +rust-version = "1.84.0" +name = "cranelift-codegen" +version = "0.119.0" +authors = ["The Cranelift Project Developers"] +build = "build.rs" +autolib = false +autobins = false +autoexamples = false +autotests = false +autobenches = false +description = "Low-level code generator library" +documentation = "https://docs.rs/cranelift-codegen" +readme = "README.md" +keywords = [ + "compile", + "compiler", + "jit", +] +categories = ["no-std"] +license = "Apache-2.0 WITH LLVM-exception" +repository = "https://github.com/bytecodealliance/wasmtime" + +[package.metadata.docs.rs] +features = ["all-arch"] + +[features] +all-arch = [ + "all-native-arch", + "pulley", +] +all-native-arch = [ + "x86", + "arm64", + "s390x", + "riscv64", +] +arm64 = [] +core = [] +default = [ + "std", + "unwind", + "host-arch", + "timing", +] +disas = [ + "anyhow", + "capstone", +] +enable-serde = [ + "serde", + "serde_derive", + "cranelift-entity/enable-serde", + "cranelift-bitset/enable-serde", + "regalloc2/enable-serde", + "smallvec/serde", +] +host-arch = [] +incremental-cache = [ + "enable-serde", + "postcard", + "sha2", +] +isle-errors = ["cranelift-isle/fancy-errors"] +pulley = [ + "dep:pulley-interpreter", + "pulley-interpreter/encode", + "pulley-interpreter/disas", + "cranelift-codegen-meta/pulley", +] +riscv64 = [] +s390x = [] +souper-harvest = [ + "souper-ir", + "souper-ir/stringify", +] +std = ["serde?/std"] +timing = [] +trace-log = ["regalloc2/trace-log"] +unwind = ["gimli"] +x86 = [] + +[lib] +name = "cranelift_codegen" +path = "src/lib.rs" + +[[bench]] +name = "x64-evex-encoding" +path = "benches/x64-evex-encoding.rs" +harness = false + +[dependencies.anyhow] +version = "1.0.93" +features = ["std"] +optional = true +default-features = false + +[dependencies.bumpalo] +version = "3" + +[dependencies.capstone] +version = "0.13.0" +optional = true + +[dependencies.cranelift-assembler-x64] +version = "0.119.0" + +[dependencies.cranelift-bforest] +version = "0.119.0" + +[dependencies.cranelift-bitset] +version = "0.119.0" + +[dependencies.cranelift-codegen-shared] +version = "0.119.0" + +[dependencies.cranelift-control] +version = "0.119.0" + +[dependencies.cranelift-entity] +version = "0.119.0" + +[dependencies.gimli] +version = "0.31.0" +features = [ + "read", + "write", + "std", +] +optional = true +default-features = false + +[dependencies.hashbrown] +version = "0.15" +default-features = false + +[dependencies.log] +version = "0.4.8" +default-features = false + +[dependencies.postcard] +version = "1.0.8" +features = ["alloc"] +optional = true +default-features = false + +[dependencies.pulley-interpreter] +version = "=32.0.0" +optional = true + +[dependencies.regalloc2] +version = "0.11.2" +features = ["checker"] + +[dependencies.rustc-hash] +version = "2.0.0" + +[dependencies.serde] +version = "1.0.215" +features = ["alloc"] +optional = true +default-features = false + +[dependencies.serde_derive] +version = "1.0.188" +optional = true + +[dependencies.sha2] +version = "0.10.2" +optional = true + +[dependencies.smallvec] +version = "1.6.1" +features = ["union"] + +[dependencies.souper-ir] +version = "2.1.0" +optional = true + +[dependencies.target-lexicon] +version = "0.13.0" + +[dev-dependencies.criterion] +version = "0.5.0" +features = [ + "html_reports", + "rayon", +] +default-features = false + +[dev-dependencies.env_logger] +version = "0.11.5" + +[dev-dependencies.similar] +version = "2.1.0" + +[build-dependencies.cranelift-codegen-meta] +version = "0.119.0" + +[build-dependencies.cranelift-isle] +version = "=0.119.0" + +[lints.clippy] +allow_attributes_without_reason = "warn" +clone_on_copy = "warn" +manual_strip = "warn" +map_clone = "warn" +uninlined_format_args = "warn" +unnecessary_cast = "warn" +unnecessary_fallible_conversions = "warn" +unnecessary_mut_passed = "warn" +unnecessary_to_owned = "warn" + +[lints.clippy.all] +level = "allow" +priority = -1 + +[lints.rust] +deprecated-safe-2024 = "warn" +impl-trait-overcaptures = "warn" +keyword_idents_2024 = "warn" +missing-unsafe-on-extern = "warn" +rust-2024-guarded-string-incompatible-syntax = "warn" +rust-2024-incompatible-pat = "warn" +rust-2024-prelude-collisions = "warn" +trivial_numeric_casts = "warn" +unsafe-attr-outside-unsafe = "warn" +unstable_features = "warn" +unused-lifetimes = "warn" +unused-macro-rules = "warn" +unused_extern_crates = "warn" +unused_import_braces = "warn" + +[lints.rust.unexpected_cfgs] +level = "warn" +priority = 0 +check-cfg = [ + "cfg(pulley_tail_calls)", + "cfg(pulley_assume_llvm_makes_tail_calls)", +] diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/Cargo.toml.orig b/collector/compile-benchmarks/cranelift-codegen-0.119.0/Cargo.toml.orig new file mode 100644 index 000000000..fccc1354a --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/Cargo.toml.orig @@ -0,0 +1,132 @@ +[package] +authors = ["The Cranelift Project Developers"] +name = "cranelift-codegen" +version = "0.119.0" +description = "Low-level code generator library" +license = "Apache-2.0 WITH LLVM-exception" +documentation = "https://docs.rs/cranelift-codegen" +repository = "https://github.com/bytecodealliance/wasmtime" +categories = ["no-std"] +readme = "README.md" +keywords = ["compile", "compiler", "jit"] +build = "build.rs" +edition.workspace = true +rust-version.workspace = true + +[lints] +workspace = true + +[package.metadata.docs.rs] +# Ask Cargo to build docs with the feature `all-arch` +features = ["all-arch"] + +[dependencies] +anyhow = { workspace = true, optional = true, features = ['std'] } +bumpalo = "3" +capstone = { workspace = true, optional = true } +cranelift-assembler-x64 = { workspace = true } +cranelift-codegen-shared = { path = "./shared", version = "0.119.0" } +cranelift-entity = { workspace = true } +cranelift-bforest = { workspace = true } +cranelift-bitset = { workspace = true } +cranelift-control = { workspace = true } +hashbrown = { workspace = true } +target-lexicon = { workspace = true } +log = { workspace = true } +serde = { workspace = true, optional = true } +serde_derive = { workspace = true, optional = true } +pulley-interpreter = { workspace = true, optional = true } +postcard = { workspace = true, optional = true } +gimli = { workspace = true, features = ["write", "std"], optional = true } +smallvec = { workspace = true } +regalloc2 = { workspace = true, features = ["checker"] } +souper-ir = { version = "2.1.0", optional = true } +sha2 = { version = "0.10.2", optional = true } +rustc-hash = { workspace = true } +# It is a goal of the cranelift-codegen crate to have minimal external dependencies. +# Please don't add any unless they are essential to the task of creating binary +# machine code. Integration tests that need external dependencies can be +# accommodated in `tests`. + +[dev-dependencies] +criterion = { workspace = true } +similar = "2.1.0" +env_logger = { workspace = true } + +[build-dependencies] +cranelift-codegen-meta = { path = "meta", version = "0.119.0" } +cranelift-isle = { path = "../isle/isle", version = "=0.119.0" } + +[features] +default = ["std", "unwind", "host-arch", "timing"] + +# The "std" feature enables use of libstd. The "core" feature enables use +# of some minimal std-like replacement libraries. At least one of these two +# features need to be enabled. +std = ["serde?/std"] + +# The "core" feature used to enable a hashmap workaround, but is now +# deprecated (we (i) always use hashbrown, and (ii) don't support a +# no_std build anymore). The feature remains for backward +# compatibility as a no-op. +core = [] + +# Enable the `to_capstone` method on TargetIsa, for constructing a Capstone +# context, and the `disassemble` method on `MachBufferFinalized`. +disas = ["anyhow", "capstone"] + +# Enables detailed logging which can be somewhat expensive. +trace-log = ["regalloc2/trace-log"] + +# This enables unwind info generation functionality. +unwind = ["gimli"] + +# ISA targets for which we should build. +# If no ISA targets are explicitly enabled, the ISA target for the host machine is enabled. +x86 = [] +arm64 = [] +s390x = [] +riscv64 = [] +pulley = [ + "dep:pulley-interpreter", + "pulley-interpreter/encode", + "pulley-interpreter/disas", + "cranelift-codegen-meta/pulley", +] +# Enable the ISA target for the host machine +host-arch = [] + +# Option to enable all architectures. +all-arch = ["all-native-arch", "pulley"] + +# Option to enable all architectures that correspond to an actual native target +# (that is, exclude Pulley). +all-native-arch = ["x86", "arm64", "s390x", "riscv64"] + +# For dependent crates that want to serialize some parts of cranelift +enable-serde = [ + "serde", + "serde_derive", + "cranelift-entity/enable-serde", + "cranelift-bitset/enable-serde", + "regalloc2/enable-serde", + "smallvec/serde", +] + +# Enable the incremental compilation cache for hot-reload use cases. +incremental-cache = ["enable-serde", "postcard", "sha2"] + +# Enable support for the Souper harvester. +souper-harvest = ["souper-ir", "souper-ir/stringify"] + +# Report any ISLE errors in pretty-printed style. +isle-errors = ["cranelift-isle/fancy-errors"] + +# Enable tracking how long passes take in Cranelift. +# +# Enabled by default. +timing = [] + +[[bench]] +name = "x64-evex-encoding" +harness = false diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/LICENSE b/collector/compile-benchmarks/cranelift-codegen-0.119.0/LICENSE new file mode 100644 index 000000000..f9d81955f --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/LICENSE @@ -0,0 +1,220 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +--- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/README.md b/collector/compile-benchmarks/cranelift-codegen-0.119.0/README.md new file mode 100644 index 000000000..18b9756aa --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/README.md @@ -0,0 +1,2 @@ +This crate contains the core Cranelift code generator. It translates code from an +intermediate representation into executable machine code. diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/benches/x64-evex-encoding.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/benches/x64-evex-encoding.rs new file mode 100644 index 000000000..dd280212c --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/benches/x64-evex-encoding.rs @@ -0,0 +1,52 @@ +//! Measure instruction encoding latency using various approaches; the +//! benchmarking is feature-gated on `x86` since it only measures the encoding +//! mechanism of that backend. + +#[cfg(feature = "x86")] +mod x86 { + use cranelift_codegen::isa::x64::encoding::{ + evex::{EvexInstruction, EvexVectorLength, Register}, + rex::{LegacyPrefixes, OpcodeMap}, + }; + use criterion::{criterion_group, Criterion}; + + // Define the benchmarks. + fn x64_evex_encoding_benchmarks(c: &mut Criterion) { + let mut group = c.benchmark_group("x64 EVEX encoding"); + let rax = Register::from(0); + let rdx = 2; + + group.bench_function("EvexInstruction (builder pattern)", |b| { + b.iter(|| { + let mut sink = cranelift_codegen::MachBuffer::new(); + EvexInstruction::new() + .prefix(LegacyPrefixes::_66) + .map(OpcodeMap::_0F38) + .w(true) + .opcode(0x1F) + .reg(rax) + .rm(rdx) + .length(EvexVectorLength::V128) + .encode(&mut sink); + }); + }); + } + criterion_group!(benches, x64_evex_encoding_benchmarks); + + /// Using an inner module to feature-gate the benchmarks means that we must + /// manually specify how to run the benchmarks (see `criterion_main!`). + pub fn run_benchmarks() { + benches(); + Criterion::default().configure_from_args().final_summary(); + } +} + +fn main() { + #[cfg(feature = "x86")] + x86::run_benchmarks(); + + #[cfg(not(feature = "x86"))] + println!( + "Unable to run the x64-evex-encoding benchmark; the `x86` feature must be enabled in Cargo.", + ); +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/build.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/build.rs new file mode 100644 index 000000000..b06b42ae8 --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/build.rs @@ -0,0 +1,225 @@ +// Build script. +// +// This program is run by Cargo when building cranelift-codegen. It is used to generate Rust code from +// the language definitions in the cranelift-codegen/meta directory. +// +// Environment: +// +// OUT_DIR +// Directory where generated files should be placed. +// +// TARGET +// Target triple provided by Cargo. +// +// The build script expects to be run from the directory where this build.rs file lives. The +// current directory is used to find the sources. + +use cranelift_codegen_meta as meta; +use cranelift_isle::error::Errors; +use meta::isle::IsleCompilation; + +use std::env; +use std::io::Read; +use std::process; +use std::time::Instant; + +fn main() { + let start_time = Instant::now(); + + let out_dir = env::var("OUT_DIR").expect("The OUT_DIR environment variable must be set"); + let out_dir = std::path::Path::new(&out_dir); + let target_triple = env::var("TARGET").expect("The TARGET environment variable must be set"); + + let all_arch = env::var("CARGO_FEATURE_ALL_ARCH").is_ok(); + let all_native_arch = env::var("CARGO_FEATURE_ALL_NATIVE_ARCH").is_ok(); + + let mut isas = meta::isa::Isa::all() + .iter() + .cloned() + .filter(|isa| { + let env_key = match isa { + meta::isa::Isa::Pulley32 | meta::isa::Isa::Pulley64 => { + "CARGO_FEATURE_PULLEY".to_string() + } + _ => format!("CARGO_FEATURE_{}", isa.to_string().to_uppercase()), + }; + all_arch || env::var(env_key).is_ok() + }) + .collect::>(); + + // Don't require host isa if under 'all-arch' feature. + let host_isa = env::var("CARGO_FEATURE_HOST_ARCH").is_ok() && !all_native_arch; + + if isas.is_empty() || host_isa { + // Try to match native target. + let target_name = target_triple.split('-').next().unwrap(); + if let Ok(isa) = meta::isa_from_arch(&target_name) { + println!("cargo:rustc-cfg=feature=\"{isa}\""); + isas.push(isa); + } + } + + let cur_dir = env::current_dir().expect("Can't access current working directory"); + let crate_dir = cur_dir.as_path(); + + println!("cargo:rerun-if-changed=build.rs"); + println!("cargo:rerun-if-env-changed=ISLE_SOURCE_DIR"); + + let isle_dir = if let Ok(path) = std::env::var("ISLE_SOURCE_DIR") { + // This will canonicalize any relative path in terms of the + // crate root, and will take any absolute path as overriding the + // `crate_dir`. + crate_dir.join(&path) + } else { + out_dir.into() + }; + + std::fs::create_dir_all(&isle_dir).expect("Could not create ISLE source directory"); + + if let Err(err) = meta::generate(&isas, &out_dir, &isle_dir) { + eprintln!("Error: {err}"); + process::exit(1); + } + + if &std::env::var("SKIP_ISLE").unwrap_or("0".to_string()) != "1" { + if let Err(err) = build_isle(crate_dir, &isle_dir) { + eprintln!("Error: {err}"); + process::exit(1); + } + } + + if env::var("CRANELIFT_VERBOSE").is_ok() { + for isa in &isas { + println!("cargo:warning=Includes support for {} ISA", isa.to_string()); + } + println!( + "cargo:warning=Build step took {:?}.", + Instant::now() - start_time + ); + println!("cargo:warning=Generated files are in {}", out_dir.display()); + } + + let pkg_version = env::var("CARGO_PKG_VERSION").unwrap(); + let mut cmd = std::process::Command::new("git"); + cmd.arg("rev-parse") + .arg("HEAD") + .stdout(std::process::Stdio::piped()) + .current_dir(env::var("CARGO_MANIFEST_DIR").unwrap()); + let version = if let Ok(mut child) = cmd.spawn() { + let mut git_rev = String::new(); + child + .stdout + .as_mut() + .unwrap() + .read_to_string(&mut git_rev) + .unwrap(); + let status = child.wait().unwrap(); + if status.success() { + let git_rev = git_rev.trim().chars().take(9).collect::(); + format!("{pkg_version}-{git_rev}") + } else { + // not a git repo + pkg_version + } + } else { + // git not available + pkg_version + }; + std::fs::write( + std::path::Path::new(&out_dir).join("version.rs"), + format!( + "/// Version number of this crate. \n\ + pub const VERSION: &str = \"{version}\";" + ), + ) + .unwrap(); +} + +/// Strip the current directory from the file paths, because `islec` +/// includes them in the generated source, and this helps us maintain +/// deterministic builds that don't include those local file paths. +fn make_isle_source_path_relative( + cur_dir: &std::path::Path, + filename: &std::path::Path, +) -> std::path::PathBuf { + if let Ok(suffix) = filename.strip_prefix(&cur_dir) { + suffix.to_path_buf() + } else { + filename.to_path_buf() + } +} + +fn build_isle( + crate_dir: &std::path::Path, + isle_dir: &std::path::Path, +) -> Result<(), Box> { + let cur_dir = std::env::current_dir()?; + let isle_compilations = meta::isle::get_isle_compilations( + &make_isle_source_path_relative(&cur_dir, &crate_dir), + &make_isle_source_path_relative(&cur_dir, &isle_dir), + ); + + let mut had_error = false; + for compilation in &isle_compilations.items { + for file in &compilation.inputs { + println!("cargo:rerun-if-changed={}", file.display()); + } + + if let Err(e) = run_compilation(compilation) { + had_error = true; + eprintln!("Error building ISLE files:"); + eprintln!("{e:?}"); + #[cfg(not(feature = "isle-errors"))] + { + eprintln!("To see a more detailed error report, run: "); + eprintln!(); + eprintln!(" $ cargo check -p cranelift-codegen --features isle-errors"); + eprintln!(); + } + } + } + + if had_error { + std::process::exit(1); + } + + println!("cargo:rustc-env=ISLE_DIR={}", isle_dir.to_str().unwrap()); + + Ok(()) +} + +/// Build ISLE DSL source text into generated Rust code. +/// +/// NB: This must happen *after* the `cranelift-codegen-meta` functions, since +/// it consumes files generated by them. +fn run_compilation(compilation: &IsleCompilation) -> Result<(), Errors> { + use cranelift_isle as isle; + + eprintln!("Rebuilding {}", compilation.output.display()); + + let code = { + let file_paths = compilation + .inputs + .iter() + .chain(compilation.untracked_inputs.iter()); + + let mut options = isle::codegen::CodegenOptions::default(); + // Because we include!() the generated ISLE source, we cannot + // put the global pragmas (`#![allow(...)]`) in the ISLE + // source itself; we have to put them in the source that + // include!()s it. (See + // https://github.com/rust-lang/rust/issues/47995.) + options.exclude_global_allow_pragmas = true; + + isle::compile::from_files(file_paths, &options)? + }; + + eprintln!( + "Writing ISLE-generated Rust code to {}", + compilation.output.display() + ); + std::fs::write(&compilation.output, code) + .map_err(|e| Errors::from_io(e, "failed writing output"))?; + + Ok(()) +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/perf-config.json b/collector/compile-benchmarks/cranelift-codegen-0.119.0/perf-config.json new file mode 100644 index 000000000..710581fa8 --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/perf-config.json @@ -0,0 +1,4 @@ +{ + "artifact": "library", + "category": "primary" +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/alias_analysis.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/alias_analysis.rs new file mode 100644 index 000000000..6a6e9f274 --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/alias_analysis.rs @@ -0,0 +1,402 @@ +//! Alias analysis, consisting of a "last store" pass and a "memory +//! values" pass. These two passes operate as one fused pass, and so +//! are implemented together here. +//! +//! We partition memory state into several *disjoint pieces* of +//! "abstract state". There are a finite number of such pieces: +//! currently, we call them "heap", "table", "vmctx", and "other".Any +//! given address in memory belongs to exactly one disjoint piece. +//! +//! One never tracks which piece a concrete address belongs to at +//! runtime; this is a purely static concept. Instead, all +//! memory-accessing instructions (loads and stores) are labeled with +//! one of these four categories in the `MemFlags`. It is forbidden +//! for a load or store to access memory under one category and a +//! later load or store to access the same memory under a different +//! category. This is ensured to be true by construction during +//! frontend translation into CLIF and during legalization. +//! +//! Given that this non-aliasing property is ensured by the producer +//! of CLIF, we can compute a *may-alias* property: one load or store +//! may-alias another load or store if both access the same category +//! of abstract state. +//! +//! The "last store" pass helps to compute this aliasing: it scans the +//! code, finding at each program point the last instruction that +//! *might have* written to a given part of abstract state. +//! +//! We can't say for sure that the "last store" *did* actually write +//! that state, but we know for sure that no instruction *later* than +//! it (up to the current instruction) did. However, we can get a +//! must-alias property from this: if at a given load or store, we +//! look backward to the "last store", *AND* we find that it has +//! exactly the same address expression and type, then we know that +//! the current instruction's access *must* be to the same memory +//! location. +//! +//! To get this must-alias property, we compute a sparse table of +//! "memory values": these are known equivalences between SSA `Value`s +//! and particular locations in memory. The memory-values table is a +//! mapping from (last store, address expression, type) to SSA +//! value. At a store, we can insert into this table directly. At a +//! load, we can also insert, if we don't already have a value (from +//! the store that produced the load's value). +//! +//! Then we can do two optimizations at once given this table. If a +//! load accesses a location identified by a (last store, address, +//! type) key already in the table, we replace it with the SSA value +//! for that memory location. This is usually known as "redundant load +//! elimination" if the value came from an earlier load of the same +//! location, or "store-to-load forwarding" if the value came from an +//! earlier store to the same location. +//! +//! In theory we could also do *dead-store elimination*, where if a +//! store overwrites a key in the table, *and* if no other load/store +//! to the abstract state category occurred, *and* no other trapping +//! instruction occurred (at which point we need an up-to-date memory +//! state because post-trap-termination memory state can be observed), +//! *and* we can prove the original store could not have trapped, then +//! we can eliminate the original store. Because this is so complex, +//! and the conditions for doing it correctly when post-trap state +//! must be correct likely reduce the potential benefit, we don't yet +//! do this. + +use crate::{ + cursor::{Cursor, FuncCursor}, + dominator_tree::DominatorTree, + inst_predicates::{ + has_memory_fence_semantics, inst_addr_offset_type, inst_store_data, visit_block_succs, + }, + ir::{immediates::Offset32, AliasRegion, Block, Function, Inst, Opcode, Type, Value}, + trace, +}; +use cranelift_entity::{packed_option::PackedOption, EntityRef}; +use rustc_hash::{FxHashMap, FxHashSet}; + +/// For a given program point, the vector of last-store instruction +/// indices for each disjoint category of abstract state. +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] +pub struct LastStores { + heap: PackedOption, + table: PackedOption, + vmctx: PackedOption, + other: PackedOption, +} + +impl LastStores { + fn update(&mut self, func: &Function, inst: Inst) { + let opcode = func.dfg.insts[inst].opcode(); + if has_memory_fence_semantics(opcode) { + self.heap = inst.into(); + self.table = inst.into(); + self.vmctx = inst.into(); + self.other = inst.into(); + } else if opcode.can_store() { + if let Some(memflags) = func.dfg.insts[inst].memflags() { + match memflags.alias_region() { + None => self.other = inst.into(), + Some(AliasRegion::Heap) => self.heap = inst.into(), + Some(AliasRegion::Table) => self.table = inst.into(), + Some(AliasRegion::Vmctx) => self.vmctx = inst.into(), + } + } else { + self.heap = inst.into(); + self.table = inst.into(); + self.vmctx = inst.into(); + self.other = inst.into(); + } + } + } + + fn get_last_store(&self, func: &Function, inst: Inst) -> PackedOption { + if let Some(memflags) = func.dfg.insts[inst].memflags() { + match memflags.alias_region() { + None => self.other, + Some(AliasRegion::Heap) => self.heap, + Some(AliasRegion::Table) => self.table, + Some(AliasRegion::Vmctx) => self.vmctx, + } + } else if func.dfg.insts[inst].opcode().can_load() + || func.dfg.insts[inst].opcode().can_store() + { + inst.into() + } else { + PackedOption::default() + } + } + + fn meet_from(&mut self, other: &LastStores, loc: Inst) { + let meet = |a: PackedOption, b: PackedOption| -> PackedOption { + match (a.into(), b.into()) { + (None, None) => None.into(), + (Some(a), None) => a, + (None, Some(b)) => b, + (Some(a), Some(b)) if a == b => a, + _ => loc.into(), + } + }; + + self.heap = meet(self.heap, other.heap); + self.table = meet(self.table, other.table); + self.vmctx = meet(self.vmctx, other.vmctx); + self.other = meet(self.other, other.other); + } +} + +/// A key identifying a unique memory location. +/// +/// For the result of a load to be equivalent to the result of another +/// load, or the store data from a store, we need for (i) the +/// "version" of memory (here ensured by having the same last store +/// instruction to touch the disjoint category of abstract state we're +/// accessing); (ii) the address must be the same (here ensured by +/// having the same SSA value, which doesn't change after computed); +/// (iii) the offset must be the same; and (iv) the accessed type and +/// extension mode (e.g., 8-to-32, signed) must be the same. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +struct MemoryLoc { + last_store: PackedOption, + address: Value, + offset: Offset32, + ty: Type, + /// We keep the *opcode* of the instruction that produced the + /// value we record at this key if the opcode is anything other + /// than an ordinary load or store. This is needed when we + /// consider loads that extend the value: e.g., an 8-to-32 + /// sign-extending load will produce a 32-bit value from an 8-bit + /// value in memory, so we can only reuse that (as part of RLE) + /// for another load with the same extending opcode. + /// + /// We could improve the transform to insert explicit extend ops + /// in place of extending loads when we know the memory value, but + /// we haven't yet done this. + extending_opcode: Option, +} + +/// An alias-analysis pass. +pub struct AliasAnalysis<'a> { + /// The domtree for the function. + domtree: &'a DominatorTree, + + /// Input state to a basic block. + block_input: FxHashMap, + + /// Known memory-value equivalences. This is the result of the + /// analysis. This is a mapping from (last store, address + /// expression, offset, type) to SSA `Value`. + /// + /// We keep the defining inst around for quick dominance checks. + mem_values: FxHashMap, +} + +impl<'a> AliasAnalysis<'a> { + /// Perform an alias analysis pass. + pub fn new(func: &Function, domtree: &'a DominatorTree) -> AliasAnalysis<'a> { + trace!("alias analysis: input is:\n{:?}", func); + let mut analysis = AliasAnalysis { + domtree, + block_input: FxHashMap::default(), + mem_values: FxHashMap::default(), + }; + + analysis.compute_block_input_states(func); + analysis + } + + fn compute_block_input_states(&mut self, func: &Function) { + let mut queue = vec![]; + let mut queue_set = FxHashSet::default(); + let entry = func.layout.entry_block().unwrap(); + queue.push(entry); + queue_set.insert(entry); + + while let Some(block) = queue.pop() { + queue_set.remove(&block); + let mut state = *self + .block_input + .entry(block) + .or_insert_with(|| LastStores::default()); + + trace!( + "alias analysis: input to block{} is {:?}", + block.index(), + state + ); + + for inst in func.layout.block_insts(block) { + state.update(func, inst); + trace!("after inst{}: state is {:?}", inst.index(), state); + } + + visit_block_succs(func, block, |_inst, succ, _from_table| { + let succ_first_inst = func.layout.block_insts(succ).into_iter().next().unwrap(); + let updated = match self.block_input.get_mut(&succ) { + Some(succ_state) => { + let old = *succ_state; + succ_state.meet_from(&state, succ_first_inst); + *succ_state != old + } + None => { + self.block_input.insert(succ, state); + true + } + }; + + if updated && queue_set.insert(succ) { + queue.push(succ); + } + }); + } + } + + /// Get the starting state for a block. + pub fn block_starting_state(&self, block: Block) -> LastStores { + self.block_input + .get(&block) + .cloned() + .unwrap_or_else(|| LastStores::default()) + } + + /// Process one instruction. Meant to be invoked in program order + /// within a block, and ideally in RPO or at least some domtree + /// preorder for maximal reuse. + /// + /// Returns `true` if instruction was removed. + pub fn process_inst( + &mut self, + func: &mut Function, + state: &mut LastStores, + inst: Inst, + ) -> Option { + trace!( + "alias analysis: scanning at inst{} with state {:?} ({:?})", + inst.index(), + state, + func.dfg.insts[inst], + ); + + let replacing_value = if let Some((address, offset, ty)) = inst_addr_offset_type(func, inst) + { + let address = func.dfg.resolve_aliases(address); + let opcode = func.dfg.insts[inst].opcode(); + + if opcode.can_store() { + let store_data = inst_store_data(func, inst).unwrap(); + let store_data = func.dfg.resolve_aliases(store_data); + let mem_loc = MemoryLoc { + last_store: inst.into(), + address, + offset, + ty, + extending_opcode: get_ext_opcode(opcode), + }; + trace!( + "alias analysis: at inst{}: store with data v{} at loc {:?}", + inst.index(), + store_data.index(), + mem_loc + ); + self.mem_values.insert(mem_loc, (inst, store_data)); + + None + } else if opcode.can_load() { + let last_store = state.get_last_store(func, inst); + let load_result = func.dfg.inst_results(inst)[0]; + let mem_loc = MemoryLoc { + last_store, + address, + offset, + ty, + extending_opcode: get_ext_opcode(opcode), + }; + trace!( + "alias analysis: at inst{}: load with last_store inst{} at loc {:?}", + inst.index(), + last_store.map(|inst| inst.index()).unwrap_or(usize::MAX), + mem_loc + ); + + // Is there a Value already known to be stored + // at this specific memory location? If so, + // we can alias the load result to this + // already-known Value. + // + // Check if the definition dominates this + // location; it might not, if it comes from a + // load (stores will always dominate though if + // their `last_store` survives through + // meet-points to this use-site). + let aliased = + if let Some((def_inst, value)) = self.mem_values.get(&mem_loc).cloned() { + trace!( + " -> sees known value v{} from inst{}", + value.index(), + def_inst.index() + ); + if self.domtree.dominates(def_inst, inst, &func.layout) { + trace!( + " -> dominates; value equiv from v{} to v{} inserted", + load_result.index(), + value.index() + ); + Some(value) + } else { + None + } + } else { + None + }; + + // Otherwise, we can keep *this* load around + // as a new equivalent value. + if aliased.is_none() { + trace!( + " -> inserting load result v{} at loc {:?}", + load_result.index(), + mem_loc + ); + self.mem_values.insert(mem_loc, (inst, load_result)); + } + + aliased + } else { + None + } + } else { + None + }; + + state.update(func, inst); + + replacing_value + } + + /// Make a pass and update known-redundant loads to aliased + /// values. We interleave the updates with the memory-location + /// tracking because resolving some aliases may expose others + /// (e.g. in cases of double-indirection with two separate chains + /// of loads). + pub fn compute_and_update_aliases(&mut self, func: &mut Function) { + let mut pos = FuncCursor::new(func); + + while let Some(block) = pos.next_block() { + let mut state = self.block_starting_state(block); + while let Some(inst) = pos.next_inst() { + if let Some(replaced_result) = self.process_inst(pos.func, &mut state, inst) { + let result = pos.func.dfg.inst_results(inst)[0]; + pos.func.dfg.clear_results(inst); + pos.func.dfg.change_to_alias(result, replaced_result); + pos.remove_inst_and_step_back(); + } + } + } + } +} + +fn get_ext_opcode(op: Opcode) -> Option { + debug_assert!(op.can_load() || op.can_store()); + match op { + Opcode::Load | Opcode::Store => None, + _ => Some(op), + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/binemit/mod.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/binemit/mod.rs new file mode 100644 index 000000000..39490849a --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/binemit/mod.rs @@ -0,0 +1,174 @@ +//! Binary machine code emission. +//! +//! The `binemit` module contains code for translating Cranelift's intermediate representation into +//! binary machine code. + +use core::fmt; +#[cfg(feature = "enable-serde")] +use serde_derive::{Deserialize, Serialize}; + +/// Offset in bytes from the beginning of the function. +/// +/// Cranelift can be used as a cross compiler, so we don't want to use a type like `usize` which +/// depends on the *host* platform, not the *target* platform. +pub type CodeOffset = u32; + +/// Addend to add to the symbol value. +pub type Addend = i64; + +/// Relocation kinds for every ISA +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub enum Reloc { + /// absolute 4-byte + Abs4, + /// absolute 8-byte + Abs8, + /// x86 PC-relative 4-byte + X86PCRel4, + /// x86 call to PC-relative 4-byte + X86CallPCRel4, + /// x86 call to PLT-relative 4-byte + X86CallPLTRel4, + /// x86 GOT PC-relative 4-byte + X86GOTPCRel4, + /// The 32-bit offset of the target from the beginning of its section. + /// Equivalent to `IMAGE_REL_AMD64_SECREL`. + /// See: [PE Format](https://docs.microsoft.com/en-us/windows/win32/debug/pe-format) + X86SecRel, + /// Arm32 call target + Arm32Call, + /// Arm64 call target. Encoded as bottom 26 bits of instruction. This + /// value is sign-extended, multiplied by 4, and added to the PC of + /// the call instruction to form the destination address. + Arm64Call, + /// s390x PC-relative 4-byte offset + S390xPCRel32Dbl, + /// s390x PC-relative 4-byte offset to PLT + S390xPLTRel32Dbl, + + /// Elf x86_64 32 bit signed PC relative offset to two GOT entries for GD symbol. + ElfX86_64TlsGd, + + /// Mach-O x86_64 32 bit signed PC relative offset to a `__thread_vars` entry. + MachOX86_64Tlv, + + /// Mach-O Aarch64 TLS + /// PC-relative distance to the page of the TLVP slot. + MachOAarch64TlsAdrPage21, + + /// Mach-O Aarch64 TLS + /// Offset within page of TLVP slot. + MachOAarch64TlsAdrPageOff12, + + /// Aarch64 TLSDESC Adr Page21 + /// This is equivalent to `R_AARCH64_TLSDESC_ADR_PAGE21` in the [aaelf64](https://github.com/ARM-software/abi-aa/blob/2bcab1e3b22d55170c563c3c7940134089176746/aaelf64/aaelf64.rst#57105thread-local-storage-descriptors) + Aarch64TlsDescAdrPage21, + + /// Aarch64 TLSDESC Ld64 Lo12 + /// This is equivalent to `R_AARCH64_TLSDESC_LD64_LO12` in the [aaelf64](https://github.com/ARM-software/abi-aa/blob/2bcab1e3b22d55170c563c3c7940134089176746/aaelf64/aaelf64.rst#57105thread-local-storage-descriptors) + Aarch64TlsDescLd64Lo12, + + /// Aarch64 TLSDESC Add Lo12 + /// This is equivalent to `R_AARCH64_TLSGD_ADD_LO12` in the [aaelf64](https://github.com/ARM-software/abi-aa/blob/2bcab1e3b22d55170c563c3c7940134089176746/aaelf64/aaelf64.rst#57105thread-local-storage-descriptors) + Aarch64TlsDescAddLo12, + + /// Aarch64 TLSDESC Call + /// This is equivalent to `R_AARCH64_TLSDESC_CALL` in the [aaelf64](https://github.com/ARM-software/abi-aa/blob/2bcab1e3b22d55170c563c3c7940134089176746/aaelf64/aaelf64.rst#57105thread-local-storage-descriptors) + Aarch64TlsDescCall, + + /// AArch64 GOT Page + /// Set the immediate value of an ADRP to bits 32:12 of X; check that –2^32 <= X < 2^32 + /// This is equivalent to `R_AARCH64_ADR_GOT_PAGE` (311) in the [aaelf64](https://github.com/ARM-software/abi-aa/blob/2bcab1e3b22d55170c563c3c7940134089176746/aaelf64/aaelf64.rst#static-aarch64-relocations) + Aarch64AdrGotPage21, + + /// AArch64 GOT Low bits + + /// Set the LD/ST immediate field to bits 11:3 of X. No overflow check; check that X&7 = 0 + /// This is equivalent to `R_AARCH64_LD64_GOT_LO12_NC` (312) in the [aaelf64](https://github.com/ARM-software/abi-aa/blob/2bcab1e3b22d55170c563c3c7940134089176746/aaelf64/aaelf64.rst#static-aarch64-relocations) + Aarch64Ld64GotLo12Nc, + + /// RISC-V Call PLT: 32-bit PC-relative function call, macros call, tail (PIC) + /// + /// Despite having PLT in the name, this relocation is also used for normal calls. + /// The non-PLT version of this relocation has been deprecated. + /// + /// This is the `R_RISCV_CALL_PLT` relocation from the RISC-V ELF psABI document. + /// + RiscvCallPlt, + + /// RISC-V TLS GD: High 20 bits of 32-bit PC-relative TLS GD GOT reference, + /// + /// This is the `R_RISCV_TLS_GD_HI20` relocation from the RISC-V ELF psABI document. + /// + RiscvTlsGdHi20, + + /// Low 12 bits of a 32-bit PC-relative relocation (I-Type instruction) + /// + /// This is the `R_RISCV_PCREL_LO12_I` relocation from the RISC-V ELF psABI document. + /// + RiscvPCRelLo12I, + + /// High 20 bits of a 32-bit PC-relative GOT offset relocation + /// + /// This is the `R_RISCV_GOT_HI20` relocation from the RISC-V ELF psABI document. + /// + RiscvGotHi20, + + /// s390x TLS GD64 - 64-bit offset of tls_index for GD symbol in GOT + S390xTlsGd64, + /// s390x TLS GDCall - marker to enable optimization of TLS calls + S390xTlsGdCall, + + /// Pulley - call a host function indirectly where the embedder resolving + /// this relocation needs to fill the 8-bit immediate that's part of the + /// `call_indirect_host` opcode (an opaque identifier used by the host). + PulleyCallIndirectHost, +} + +impl fmt::Display for Reloc { + /// Display trait implementation drops the arch, since its used in contexts where the arch is + /// already unambiguous, e.g. clif syntax with isa specified. In other contexts, use Debug. + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + Self::Abs4 => write!(f, "Abs4"), + Self::Abs8 => write!(f, "Abs8"), + Self::S390xPCRel32Dbl => write!(f, "PCRel32Dbl"), + Self::S390xPLTRel32Dbl => write!(f, "PLTRel32Dbl"), + Self::X86PCRel4 => write!(f, "PCRel4"), + Self::X86CallPCRel4 => write!(f, "CallPCRel4"), + Self::X86CallPLTRel4 => write!(f, "CallPLTRel4"), + Self::X86GOTPCRel4 => write!(f, "GOTPCRel4"), + Self::X86SecRel => write!(f, "SecRel"), + Self::Arm32Call | Self::Arm64Call => write!(f, "Call"), + Self::RiscvCallPlt => write!(f, "RiscvCallPlt"), + Self::RiscvTlsGdHi20 => write!(f, "RiscvTlsGdHi20"), + Self::RiscvGotHi20 => write!(f, "RiscvGotHi20"), + Self::RiscvPCRelLo12I => write!(f, "RiscvPCRelLo12I"), + Self::ElfX86_64TlsGd => write!(f, "ElfX86_64TlsGd"), + Self::MachOX86_64Tlv => write!(f, "MachOX86_64Tlv"), + Self::MachOAarch64TlsAdrPage21 => write!(f, "MachOAarch64TlsAdrPage21"), + Self::MachOAarch64TlsAdrPageOff12 => write!(f, "MachOAarch64TlsAdrPageOff12"), + Self::Aarch64TlsDescAdrPage21 => write!(f, "Aarch64TlsDescAdrPage21"), + Self::Aarch64TlsDescLd64Lo12 => write!(f, "Aarch64TlsDescLd64Lo12"), + Self::Aarch64TlsDescAddLo12 => write!(f, "Aarch64TlsDescAddLo12"), + Self::Aarch64TlsDescCall => write!(f, "Aarch64TlsDescCall"), + Self::Aarch64AdrGotPage21 => write!(f, "Aarch64AdrGotPage21"), + Self::Aarch64Ld64GotLo12Nc => write!(f, "Aarch64AdrGotLo12Nc"), + Self::S390xTlsGd64 => write!(f, "TlsGd64"), + Self::S390xTlsGdCall => write!(f, "TlsGdCall"), + Self::PulleyCallIndirectHost => write!(f, "PulleyCallIndirectHost"), + } + } +} + +/// Container for information about a vector of compiled code and its supporting read-only data. +/// +/// The code starts at offset 0 and is followed optionally by relocatable jump tables and copyable +/// (raw binary) read-only data. Any padding between sections is always part of the section that +/// precedes the boundary between the sections. +#[derive(Debug, PartialEq)] +pub struct CodeInfo { + /// Number of bytes in total. + pub total_size: CodeOffset, +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/cfg_printer.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/cfg_printer.rs new file mode 100644 index 000000000..ef71b63ca --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/cfg_printer.rs @@ -0,0 +1,83 @@ +//! The `CFGPrinter` utility. + +use alloc::vec::Vec; +use core::fmt::{Display, Formatter, Result, Write}; + +use crate::entity::SecondaryMap; +use crate::flowgraph::{BlockPredecessor, ControlFlowGraph}; +use crate::ir::Function; +use crate::write::{FuncWriter, PlainWriter}; + +/// A utility for pretty-printing the CFG of a `Function`. +pub struct CFGPrinter<'a> { + func: &'a Function, + cfg: ControlFlowGraph, +} + +/// A utility for pretty-printing the CFG of a `Function`. +impl<'a> CFGPrinter<'a> { + /// Create a new CFGPrinter. + pub fn new(func: &'a Function) -> Self { + Self { + func, + cfg: ControlFlowGraph::with_function(func), + } + } + + /// Write the CFG for this function to `w`. + pub fn write(&self, w: &mut dyn Write) -> Result { + self.header(w)?; + self.block_nodes(w)?; + self.cfg_connections(w)?; + writeln!(w, "}}") + } + + fn header(&self, w: &mut dyn Write) -> Result { + writeln!(w, "digraph \"{}\" {{", self.func.name)?; + if let Some(entry) = self.func.layout.entry_block() { + writeln!(w, " {{rank=min; {entry}}}")?; + } + Ok(()) + } + + fn block_nodes(&self, w: &mut dyn Write) -> Result { + let mut aliases = SecondaryMap::<_, Vec<_>>::new(); + for v in self.func.dfg.values() { + // VADFS returns the immediate target of an alias + if let Some(k) = self.func.dfg.value_alias_dest_for_serialization(v) { + aliases[k].push(v); + } + } + + for block in &self.func.layout { + write!(w, " {block} [shape=record, label=\"{{")?; + crate::write::write_block_header(w, self.func, block, 4)?; + // Add all outgoing branch instructions to the label. + if let Some(inst) = self.func.layout.last_inst(block) { + write!(w, " | <{inst}>")?; + PlainWriter.write_instruction(w, self.func, &aliases, inst, 0)?; + } + writeln!(w, "}}\"]")? + } + Ok(()) + } + + fn cfg_connections(&self, w: &mut dyn Write) -> Result { + for block in &self.func.layout { + for BlockPredecessor { + block: parent, + inst, + } in self.cfg.pred_iter(block) + { + writeln!(w, " {parent}:{inst} -> {block}")?; + } + } + Ok(()) + } +} + +impl<'a> Display for CFGPrinter<'a> { + fn fmt(&self, f: &mut Formatter) -> Result { + self.write(f) + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/constant_hash.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/constant_hash.rs new file mode 100644 index 000000000..1de2a2edb --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/constant_hash.rs @@ -0,0 +1,62 @@ +//! Runtime support for precomputed constant hash tables. +//! +//! The shared module with the same name can generate constant hash tables using open addressing +//! and quadratic probing. +//! +//! The hash tables are arrays that are guaranteed to: +//! +//! - Have a power-of-two size. +//! - Contain at least one empty slot. +//! +//! This module provides runtime support for lookups in these tables. + +// Re-export entities from constant_hash for simplicity of use. +pub use cranelift_codegen_shared::constant_hash::*; + +/// Trait that must be implemented by the entries in a constant hash table. +pub trait Table { + /// Get the number of entries in this table which must be a power of two. + fn len(&self) -> usize; + + /// Get the key corresponding to the entry at `idx`, or `None` if the entry is empty. + /// The `idx` must be in range. + fn key(&self, idx: usize) -> Option; +} + +/// Look for `key` in `table`. +/// +/// The provided `hash` value must have been computed from `key` using the same hash function that +/// was used to construct the table. +/// +/// Returns `Ok(idx)` with the table index containing the found entry, or `Err(idx)` with the empty +/// sentinel entry if no entry could be found. +pub fn probe + ?Sized>( + table: &T, + key: K, + hash: usize, +) -> Result { + debug_assert!(table.len().is_power_of_two()); + let mask = table.len() - 1; + + let mut idx = hash; + let mut step = 0; + + loop { + idx &= mask; + + match table.key(idx) { + None => return Err(idx), + Some(k) if k == key => return Ok(idx), + _ => {} + } + + // Quadratic probing. + step += 1; + + // When `table.len()` is a power of two, it can be proven that `idx` will visit all + // entries. This means that this loop will always terminate if the hash table has even + // one unused entry. + debug_assert!(step < table.len()); + idx += step; + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/context.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/context.rs new file mode 100644 index 000000000..b52d82a03 --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/context.rs @@ -0,0 +1,380 @@ +//! Cranelift compilation context and main entry point. +//! +//! When compiling many small functions, it is important to avoid repeatedly allocating and +//! deallocating the data structures needed for compilation. The `Context` struct is used to hold +//! on to memory allocations between function compilations. +//! +//! The context does not hold a `TargetIsa` instance which has to be provided as an argument +//! instead. This is because an ISA instance is immutable and can be used by multiple compilation +//! contexts concurrently. Typically, you would have one context per compilation thread and only a +//! single ISA instance. + +use crate::alias_analysis::AliasAnalysis; +use crate::dominator_tree::DominatorTree; +use crate::egraph::EgraphPass; +use crate::flowgraph::ControlFlowGraph; +use crate::ir::Function; +use crate::isa::TargetIsa; +use crate::legalizer::simple_legalize; +use crate::loop_analysis::LoopAnalysis; +use crate::machinst::{CompiledCode, CompiledCodeStencil}; +use crate::nan_canonicalization::do_nan_canonicalization; +use crate::remove_constant_phis::do_remove_constant_phis; +use crate::result::{CodegenResult, CompileResult}; +use crate::settings::{FlagsOrIsa, OptLevel}; +use crate::trace; +use crate::unreachable_code::eliminate_unreachable_code; +use crate::verifier::{verify_context, VerifierErrors, VerifierResult}; +use crate::{timing, CompileError}; +#[cfg(feature = "souper-harvest")] +use alloc::string::String; +use alloc::vec::Vec; +use cranelift_control::ControlPlane; +use target_lexicon::Architecture; + +#[cfg(feature = "souper-harvest")] +use crate::souper_harvest::do_souper_harvest; + +/// Persistent data structures and compilation pipeline. +pub struct Context { + /// The function we're compiling. + pub func: Function, + + /// The control flow graph of `func`. + pub cfg: ControlFlowGraph, + + /// Dominator tree for `func`. + pub domtree: DominatorTree, + + /// Loop analysis of `func`. + pub loop_analysis: LoopAnalysis, + + /// Result of MachBackend compilation, if computed. + pub(crate) compiled_code: Option, + + /// Flag: do we want a disassembly with the CompiledCode? + pub want_disasm: bool, +} + +impl Context { + /// Allocate a new compilation context. + /// + /// The returned instance should be reused for compiling multiple functions in order to avoid + /// needless allocator thrashing. + pub fn new() -> Self { + Self::for_function(Function::new()) + } + + /// Allocate a new compilation context with an existing Function. + /// + /// The returned instance should be reused for compiling multiple functions in order to avoid + /// needless allocator thrashing. + pub fn for_function(func: Function) -> Self { + Self { + func, + cfg: ControlFlowGraph::new(), + domtree: DominatorTree::new(), + loop_analysis: LoopAnalysis::new(), + compiled_code: None, + want_disasm: false, + } + } + + /// Clear all data structures in this context. + pub fn clear(&mut self) { + self.func.clear(); + self.cfg.clear(); + self.domtree.clear(); + self.loop_analysis.clear(); + self.compiled_code = None; + self.want_disasm = false; + } + + /// Returns the compilation result for this function, available after any `compile` function + /// has been called. + pub fn compiled_code(&self) -> Option<&CompiledCode> { + self.compiled_code.as_ref() + } + + /// Returns the compilation result for this function, available after any `compile` function + /// has been called. + pub fn take_compiled_code(&mut self) -> Option { + self.compiled_code.take() + } + + /// Set the flag to request a disassembly when compiling with a + /// `MachBackend` backend. + pub fn set_disasm(&mut self, val: bool) { + self.want_disasm = val; + } + + /// Compile the function, and emit machine code into a `Vec`. + #[deprecated = "use Context::compile"] + pub fn compile_and_emit( + &mut self, + isa: &dyn TargetIsa, + mem: &mut Vec, + ctrl_plane: &mut ControlPlane, + ) -> CompileResult<&CompiledCode> { + let compiled_code = self.compile(isa, ctrl_plane)?; + mem.extend_from_slice(compiled_code.code_buffer()); + Ok(compiled_code) + } + + /// Internally compiles the function into a stencil. + /// + /// Public only for testing and fuzzing purposes. + pub fn compile_stencil( + &mut self, + isa: &dyn TargetIsa, + ctrl_plane: &mut ControlPlane, + ) -> CodegenResult { + let _tt = timing::compile(); + + self.verify_if(isa)?; + + self.optimize(isa, ctrl_plane)?; + + isa.compile_function(&self.func, &self.domtree, self.want_disasm, ctrl_plane) + } + + /// Optimize the function, performing all compilation steps up to + /// but not including machine-code lowering and register + /// allocation. + /// + /// Public only for testing purposes. + pub fn optimize( + &mut self, + isa: &dyn TargetIsa, + ctrl_plane: &mut ControlPlane, + ) -> CodegenResult<()> { + log::debug!( + "Number of CLIF instructions to optimize: {}", + self.func.dfg.num_insts() + ); + log::debug!( + "Number of CLIF blocks to optimize: {}", + self.func.dfg.num_blocks() + ); + + let opt_level = isa.flags().opt_level(); + crate::trace!( + "Optimizing (opt level {:?}):\n{}", + opt_level, + self.func.display() + ); + + self.compute_cfg(); + if isa.flags().enable_nan_canonicalization() { + self.canonicalize_nans(isa)?; + } + + self.legalize(isa)?; + + self.compute_domtree(); + self.eliminate_unreachable_code(isa)?; + self.remove_constant_phis(isa)?; + + self.func.dfg.resolve_all_aliases(); + + if opt_level != OptLevel::None { + self.egraph_pass(isa, ctrl_plane)?; + } + + Ok(()) + } + + /// Compile the function, + /// + /// Run the function through all the passes necessary to generate + /// code for the target ISA represented by `isa`. The generated + /// machine code is not relocated. Instead, any relocations can be + /// obtained from `compiled_code.buffer.relocs()`. + /// + /// Performs any optimizations that are enabled, unless + /// `optimize()` was already invoked. + /// + /// Returns the generated machine code as well as information about + /// the function's code and read-only data. + pub fn compile( + &mut self, + isa: &dyn TargetIsa, + ctrl_plane: &mut ControlPlane, + ) -> CompileResult<&CompiledCode> { + let stencil = self + .compile_stencil(isa, ctrl_plane) + .map_err(|error| CompileError { + inner: error, + func: &self.func, + })?; + Ok(self + .compiled_code + .insert(stencil.apply_params(&self.func.params))) + } + + /// If available, return information about the code layout in the + /// final machine code: the offsets (in bytes) of each basic-block + /// start, and all basic-block edges. + #[deprecated = "use CompiledCode::get_code_bb_layout"] + pub fn get_code_bb_layout(&self) -> Option<(Vec, Vec<(usize, usize)>)> { + self.compiled_code().map(CompiledCode::get_code_bb_layout) + } + + /// Creates unwind information for the function. + /// + /// Returns `None` if the function has no unwind information. + #[cfg(feature = "unwind")] + #[deprecated = "use CompiledCode::create_unwind_info"] + pub fn create_unwind_info( + &self, + isa: &dyn TargetIsa, + ) -> CodegenResult> { + self.compiled_code().unwrap().create_unwind_info(isa) + } + + /// Run the verifier on the function. + /// + /// Also check that the dominator tree and control flow graph are consistent with the function. + /// + /// TODO: rename to "CLIF validate" or similar. + pub fn verify<'a, FOI: Into>>(&self, fisa: FOI) -> VerifierResult<()> { + let mut errors = VerifierErrors::default(); + let _ = verify_context(&self.func, &self.cfg, &self.domtree, fisa, &mut errors); + + if errors.is_empty() { + Ok(()) + } else { + Err(errors) + } + } + + /// Run the verifier only if the `enable_verifier` setting is true. + pub fn verify_if<'a, FOI: Into>>(&self, fisa: FOI) -> CodegenResult<()> { + let fisa = fisa.into(); + if fisa.flags.enable_verifier() { + self.verify(fisa)?; + } + Ok(()) + } + + /// Perform constant-phi removal on the function. + pub fn remove_constant_phis<'a, FOI: Into>>( + &mut self, + fisa: FOI, + ) -> CodegenResult<()> { + do_remove_constant_phis(&mut self.func, &mut self.domtree); + self.verify_if(fisa)?; + Ok(()) + } + + /// Perform NaN canonicalizing rewrites on the function. + pub fn canonicalize_nans(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> { + // Currently only RiscV64 is the only arch that may not have vector support. + let has_vector_support = match isa.triple().architecture { + Architecture::Riscv64(_) => match isa.isa_flags().iter().find(|f| f.name == "has_v") { + Some(value) => value.as_bool().unwrap_or(false), + None => false, + }, + _ => true, + }; + do_nan_canonicalization(&mut self.func, has_vector_support); + self.verify_if(isa) + } + + /// Run the legalizer for `isa` on the function. + pub fn legalize(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> { + // Legalization invalidates the domtree and loop_analysis by mutating the CFG. + // TODO: Avoid doing this when legalization doesn't actually mutate the CFG. + self.domtree.clear(); + self.loop_analysis.clear(); + + // Run some specific legalizations only. + simple_legalize(&mut self.func, isa); + self.verify_if(isa) + } + + /// Compute the control flow graph. + pub fn compute_cfg(&mut self) { + self.cfg.compute(&self.func) + } + + /// Compute dominator tree. + pub fn compute_domtree(&mut self) { + self.domtree.compute(&self.func, &self.cfg) + } + + /// Compute the loop analysis. + pub fn compute_loop_analysis(&mut self) { + self.loop_analysis + .compute(&self.func, &self.cfg, &self.domtree) + } + + /// Compute the control flow graph and dominator tree. + pub fn flowgraph(&mut self) { + self.compute_cfg(); + self.compute_domtree() + } + + /// Perform unreachable code elimination. + pub fn eliminate_unreachable_code<'a, FOI>(&mut self, fisa: FOI) -> CodegenResult<()> + where + FOI: Into>, + { + eliminate_unreachable_code(&mut self.func, &mut self.cfg, &self.domtree); + self.verify_if(fisa) + } + + /// Replace all redundant loads with the known values in + /// memory. These are loads whose values were already loaded by + /// other loads earlier, as well as loads whose values were stored + /// by a store instruction to the same instruction (so-called + /// "store-to-load forwarding"). + pub fn replace_redundant_loads(&mut self) -> CodegenResult<()> { + let mut analysis = AliasAnalysis::new(&self.func, &self.domtree); + analysis.compute_and_update_aliases(&mut self.func); + Ok(()) + } + + /// Harvest candidate left-hand sides for superoptimization with Souper. + #[cfg(feature = "souper-harvest")] + pub fn souper_harvest( + &mut self, + out: &mut std::sync::mpsc::Sender, + ) -> CodegenResult<()> { + do_souper_harvest(&self.func, out); + Ok(()) + } + + /// Run optimizations via the egraph infrastructure. + pub fn egraph_pass<'a, FOI>( + &mut self, + fisa: FOI, + ctrl_plane: &mut ControlPlane, + ) -> CodegenResult<()> + where + FOI: Into>, + { + let _tt = timing::egraph(); + + trace!( + "About to optimize with egraph phase:\n{}", + self.func.display() + ); + let fisa = fisa.into(); + self.compute_loop_analysis(); + let mut alias_analysis = AliasAnalysis::new(&self.func, &self.domtree); + let mut pass = EgraphPass::new( + &mut self.func, + &self.domtree, + &self.loop_analysis, + &mut alias_analysis, + &fisa.flags, + ctrl_plane, + ); + pass.run(); + log::debug!("egraph stats: {:?}", pass.stats); + trace!("After egraph optimization:\n{}", self.func.display()); + + self.verify_if(fisa) + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ctxhash.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ctxhash.rs new file mode 100644 index 000000000..98931566a --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ctxhash.rs @@ -0,0 +1,224 @@ +//! A hashmap with "external hashing": nodes are hashed or compared for +//! equality only with some external context provided on lookup/insert. +//! This allows very memory-efficient data structures where +//! node-internal data references some other storage (e.g., offsets into +//! an array or pool of shared data). + +use hashbrown::hash_table::HashTable; +use std::hash::{Hash, Hasher}; + +/// Trait that allows for equality comparison given some external +/// context. +/// +/// Note that this trait is implemented by the *context*, rather than +/// the item type, for somewhat complex lifetime reasons (lack of GATs +/// to allow `for<'ctx> Ctx<'ctx>`-like associated types in traits on +/// the value type). +pub trait CtxEq { + /// Determine whether `a` and `b` are equal, given the context in + /// `self` and the union-find data structure `uf`. + fn ctx_eq(&self, a: &V1, b: &V2) -> bool; +} + +/// Trait that allows for hashing given some external context. +pub trait CtxHash: CtxEq { + /// Compute the hash of `value`, given the context in `self` and + /// the union-find data structure `uf`. + fn ctx_hash(&self, state: &mut H, value: &Value); +} + +/// A null-comparator context type for underlying value types that +/// already have `Eq` and `Hash`. +#[derive(Default)] +pub struct NullCtx; + +impl CtxEq for NullCtx { + fn ctx_eq(&self, a: &V, b: &V) -> bool { + a.eq(b) + } +} +impl CtxHash for NullCtx { + fn ctx_hash(&self, state: &mut H, value: &V) { + value.hash(state); + } +} + +/// A bucket in the hash table. +/// +/// Some performance-related design notes: we cache the hashcode for +/// speed, as this often buys a few percent speed in +/// interning-table-heavy workloads. We only keep the low 32 bits of +/// the hashcode, for memory efficiency: in common use, `K` and `V` +/// are often 32 bits also, and a 12-byte bucket is measurably better +/// than a 16-byte bucket. +struct BucketData { + hash: u32, + k: K, + v: V, +} + +/// A HashMap that takes external context for all operations. +pub struct CtxHashMap { + raw: HashTable>, +} + +impl CtxHashMap { + /// Create an empty hashmap with pre-allocated space for the given + /// capacity. + pub fn with_capacity(capacity: usize) -> Self { + Self { + raw: HashTable::with_capacity(capacity), + } + } +} + +fn compute_hash(ctx: &Ctx, k: &K) -> u32 +where + Ctx: CtxHash, +{ + let mut hasher = rustc_hash::FxHasher::default(); + ctx.ctx_hash(&mut hasher, k); + hasher.finish() as u32 +} + +impl CtxHashMap { + /// Insert a new key-value pair, returning the old value associated + /// with this key (if any). + pub fn insert(&mut self, k: K, v: V, ctx: &Ctx) -> Option + where + Ctx: CtxEq + CtxHash, + { + let hash = compute_hash(ctx, &k); + match self.raw.find_mut(hash as u64, |bucket| { + hash == bucket.hash && ctx.ctx_eq(&bucket.k, &k) + }) { + Some(bucket) => Some(std::mem::replace(&mut bucket.v, v)), + None => { + let data = BucketData { hash, k, v }; + self.raw + .insert_unique(hash as u64, data, |bucket| bucket.hash as u64); + None + } + } + } + + /// Look up a key, returning a borrow of the value if present. + pub fn get<'a, Q, Ctx>(&'a self, k: &Q, ctx: &Ctx) -> Option<&'a V> + where + Ctx: CtxEq + CtxHash + CtxHash, + { + let hash = compute_hash(ctx, k); + self.raw + .find(hash as u64, |bucket| { + hash == bucket.hash && ctx.ctx_eq(&bucket.k, k) + }) + .map(|bucket| &bucket.v) + } + + /// Look up a key, returning an `Entry` that refers to an existing + /// value or allows inserting a new one. + pub fn entry<'a, Ctx>(&'a mut self, k: K, ctx: &Ctx) -> Entry<'a, K, V> + where + Ctx: CtxEq + CtxHash, + { + let hash = compute_hash(ctx, &k); + let raw = self.raw.entry( + hash as u64, + |bucket| hash == bucket.hash && ctx.ctx_eq(&bucket.k, &k), + |bucket| compute_hash(ctx, &bucket.k) as u64, + ); + match raw { + hashbrown::hash_table::Entry::Occupied(o) => Entry::Occupied(OccupiedEntry { raw: o }), + hashbrown::hash_table::Entry::Vacant(v) => Entry::Vacant(VacantEntry { + hash, + key: k, + raw: v, + }), + } + } +} + +/// A reference to an existing or vacant entry in the hash table. +pub enum Entry<'a, K, V> { + Occupied(OccupiedEntry<'a, K, V>), + Vacant(VacantEntry<'a, K, V>), +} + +/// A reference to an occupied entry in the hash table. +pub struct OccupiedEntry<'a, K, V> { + raw: hashbrown::hash_table::OccupiedEntry<'a, BucketData>, +} + +/// A reference to a vacant entry in the hash table. +pub struct VacantEntry<'a, K, V> { + hash: u32, + key: K, + raw: hashbrown::hash_table::VacantEntry<'a, BucketData>, +} + +impl<'a, K, V> OccupiedEntry<'a, K, V> { + /// Get the existing value. + pub fn get(&self) -> &V { + &self.raw.get().v + } + + /// Get the existing value, mutably. + pub fn get_mut(&mut self) -> &mut V { + &mut self.raw.get_mut().v + } +} + +impl<'a, K, V> VacantEntry<'a, K, V> { + /// Insert a new value. + pub fn insert(self, v: V) { + self.raw.insert(BucketData { + hash: self.hash, + k: self.key, + v, + }); + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[derive(Clone, Copy, Debug)] + struct Key { + index: u32, + } + struct Ctx { + vals: &'static [&'static str], + } + impl CtxEq for Ctx { + fn ctx_eq(&self, a: &Key, b: &Key) -> bool { + self.vals[a.index as usize].eq(self.vals[b.index as usize]) + } + } + impl CtxHash for Ctx { + fn ctx_hash(&self, state: &mut H, value: &Key) { + self.vals[value.index as usize].hash(state); + } + } + + #[test] + fn test_basic() { + let ctx = Ctx { + vals: &["a", "b", "a"], + }; + + let k0 = Key { index: 0 }; + let k1 = Key { index: 1 }; + let k2 = Key { index: 2 }; + + assert!(ctx.ctx_eq(&k0, &k2)); + assert!(!ctx.ctx_eq(&k0, &k1)); + assert!(!ctx.ctx_eq(&k2, &k1)); + + let mut map: CtxHashMap = CtxHashMap::with_capacity(4); + assert_eq!(map.insert(k0, 42, &ctx), None); + assert_eq!(map.insert(k2, 84, &ctx), Some(42)); + assert_eq!(map.get(&k1, &ctx), None); + assert_eq!(*map.get(&k0, &ctx).unwrap(), 84); + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/cursor.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/cursor.rs new file mode 100644 index 000000000..a0176e629 --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/cursor.rs @@ -0,0 +1,644 @@ +//! Cursor library. +//! +//! This module defines cursor data types that can be used for inserting instructions. + +use crate::ir; + +/// The possible positions of a cursor. +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum CursorPosition { + /// Cursor is not pointing anywhere. No instructions can be inserted. + Nowhere, + /// Cursor is pointing at an existing instruction. + /// New instructions will be inserted *before* the current instruction. + At(ir::Inst), + /// Cursor is before the beginning of a block. No instructions can be inserted. Calling + /// `next_inst()` will move to the first instruction in the block. + Before(ir::Block), + /// Cursor is pointing after the end of a block. + /// New instructions will be appended to the block. + After(ir::Block), +} + +/// All cursor types implement the `Cursor` which provides common navigation operations. +pub trait Cursor { + /// Get the current cursor position. + fn position(&self) -> CursorPosition; + + /// Set the current position. + fn set_position(&mut self, pos: CursorPosition); + + /// Get the source location that should be assigned to new instructions. + fn srcloc(&self) -> ir::SourceLoc; + + /// Set the source location that should be assigned to new instructions. + fn set_srcloc(&mut self, srcloc: ir::SourceLoc); + + /// Borrow a reference to the function layout that this cursor is navigating. + fn layout(&self) -> &ir::Layout; + + /// Borrow a mutable reference to the function layout that this cursor is navigating. + fn layout_mut(&mut self) -> &mut ir::Layout; + + /// Exchange this cursor for one with a set source location. + /// + /// This is intended to be used as a builder method: + /// + /// ``` + /// # use cranelift_codegen::ir::{Function, Block, SourceLoc}; + /// # use cranelift_codegen::cursor::{Cursor, FuncCursor}; + /// fn edit_func(func: &mut Function, srcloc: SourceLoc) { + /// let mut pos = FuncCursor::new(func).with_srcloc(srcloc); + /// + /// // Use `pos`... + /// } + /// ``` + fn with_srcloc(mut self, srcloc: ir::SourceLoc) -> Self + where + Self: Sized, + { + self.set_srcloc(srcloc); + self + } + + /// Rebuild this cursor positioned at `pos`. + fn at_position(mut self, pos: CursorPosition) -> Self + where + Self: Sized, + { + self.set_position(pos); + self + } + + /// Rebuild this cursor positioned at `inst`. + /// + /// This is intended to be used as a builder method: + /// + /// ``` + /// # use cranelift_codegen::ir::{Function, Block, Inst}; + /// # use cranelift_codegen::cursor::{Cursor, FuncCursor}; + /// fn edit_func(func: &mut Function, inst: Inst) { + /// let mut pos = FuncCursor::new(func).at_inst(inst); + /// + /// // Use `pos`... + /// } + /// ``` + fn at_inst(mut self, inst: ir::Inst) -> Self + where + Self: Sized, + { + self.goto_inst(inst); + self + } + + /// Rebuild this cursor positioned at the first insertion point for `block`. + /// This differs from `at_first_inst` in that it doesn't assume that any + /// instructions have been inserted into `block` yet. + /// + /// This is intended to be used as a builder method: + /// + /// ``` + /// # use cranelift_codegen::ir::{Function, Block, Inst}; + /// # use cranelift_codegen::cursor::{Cursor, FuncCursor}; + /// fn edit_func(func: &mut Function, block: Block) { + /// let mut pos = FuncCursor::new(func).at_first_insertion_point(block); + /// + /// // Use `pos`... + /// } + /// ``` + fn at_first_insertion_point(mut self, block: ir::Block) -> Self + where + Self: Sized, + { + self.goto_first_insertion_point(block); + self + } + + /// Rebuild this cursor positioned at the first instruction in `block`. + /// + /// This is intended to be used as a builder method: + /// + /// ``` + /// # use cranelift_codegen::ir::{Function, Block, Inst}; + /// # use cranelift_codegen::cursor::{Cursor, FuncCursor}; + /// fn edit_func(func: &mut Function, block: Block) { + /// let mut pos = FuncCursor::new(func).at_first_inst(block); + /// + /// // Use `pos`... + /// } + /// ``` + fn at_first_inst(mut self, block: ir::Block) -> Self + where + Self: Sized, + { + self.goto_first_inst(block); + self + } + + /// Rebuild this cursor positioned at the last instruction in `block`. + /// + /// This is intended to be used as a builder method: + /// + /// ``` + /// # use cranelift_codegen::ir::{Function, Block, Inst}; + /// # use cranelift_codegen::cursor::{Cursor, FuncCursor}; + /// fn edit_func(func: &mut Function, block: Block) { + /// let mut pos = FuncCursor::new(func).at_last_inst(block); + /// + /// // Use `pos`... + /// } + /// ``` + fn at_last_inst(mut self, block: ir::Block) -> Self + where + Self: Sized, + { + self.goto_last_inst(block); + self + } + + /// Rebuild this cursor positioned after `inst`. + /// + /// This is intended to be used as a builder method: + /// + /// ``` + /// # use cranelift_codegen::ir::{Function, Block, Inst}; + /// # use cranelift_codegen::cursor::{Cursor, FuncCursor}; + /// fn edit_func(func: &mut Function, inst: Inst) { + /// let mut pos = FuncCursor::new(func).after_inst(inst); + /// + /// // Use `pos`... + /// } + /// ``` + fn after_inst(mut self, inst: ir::Inst) -> Self + where + Self: Sized, + { + self.goto_after_inst(inst); + self + } + + /// Rebuild this cursor positioned at the top of `block`. + /// + /// This is intended to be used as a builder method: + /// + /// ``` + /// # use cranelift_codegen::ir::{Function, Block, Inst}; + /// # use cranelift_codegen::cursor::{Cursor, FuncCursor}; + /// fn edit_func(func: &mut Function, block: Block) { + /// let mut pos = FuncCursor::new(func).at_top(block); + /// + /// // Use `pos`... + /// } + /// ``` + fn at_top(mut self, block: ir::Block) -> Self + where + Self: Sized, + { + self.goto_top(block); + self + } + + /// Rebuild this cursor positioned at the bottom of `block`. + /// + /// This is intended to be used as a builder method: + /// + /// ``` + /// # use cranelift_codegen::ir::{Function, Block, Inst}; + /// # use cranelift_codegen::cursor::{Cursor, FuncCursor}; + /// fn edit_func(func: &mut Function, block: Block) { + /// let mut pos = FuncCursor::new(func).at_bottom(block); + /// + /// // Use `pos`... + /// } + /// ``` + fn at_bottom(mut self, block: ir::Block) -> Self + where + Self: Sized, + { + self.goto_bottom(block); + self + } + + /// Get the block corresponding to the current position. + fn current_block(&self) -> Option { + use self::CursorPosition::*; + match self.position() { + Nowhere => None, + At(inst) => self.layout().inst_block(inst), + Before(block) | After(block) => Some(block), + } + } + + /// Get the instruction corresponding to the current position, if any. + fn current_inst(&self) -> Option { + use self::CursorPosition::*; + match self.position() { + At(inst) => Some(inst), + _ => None, + } + } + + /// Go to the position after a specific instruction, which must be inserted + /// in the layout. New instructions will be inserted after `inst`. + fn goto_after_inst(&mut self, inst: ir::Inst) { + debug_assert!(self.layout().inst_block(inst).is_some()); + let new_pos = if let Some(next) = self.layout().next_inst(inst) { + CursorPosition::At(next) + } else { + CursorPosition::After( + self.layout() + .inst_block(inst) + .expect("current instruction removed?"), + ) + }; + self.set_position(new_pos); + } + + /// Go to a specific instruction which must be inserted in the layout. + /// New instructions will be inserted before `inst`. + fn goto_inst(&mut self, inst: ir::Inst) { + debug_assert!(self.layout().inst_block(inst).is_some()); + self.set_position(CursorPosition::At(inst)); + } + + /// Go to the position for inserting instructions at the beginning of `block`, + /// which unlike `goto_first_inst` doesn't assume that any instructions have + /// been inserted into `block` yet. + fn goto_first_insertion_point(&mut self, block: ir::Block) { + if let Some(inst) = self.layout().first_inst(block) { + self.goto_inst(inst); + } else { + self.goto_bottom(block); + } + } + + /// Go to the first instruction in `block`. + fn goto_first_inst(&mut self, block: ir::Block) { + let inst = self.layout().first_inst(block).expect("Empty block"); + self.goto_inst(inst); + } + + /// Go to the last instruction in `block`. + fn goto_last_inst(&mut self, block: ir::Block) { + let inst = self.layout().last_inst(block).expect("Empty block"); + self.goto_inst(inst); + } + + /// Go to the top of `block` which must be inserted into the layout. + /// At this position, instructions cannot be inserted, but `next_inst()` will move to the first + /// instruction in `block`. + fn goto_top(&mut self, block: ir::Block) { + debug_assert!(self.layout().is_block_inserted(block)); + self.set_position(CursorPosition::Before(block)); + } + + /// Go to the bottom of `block` which must be inserted into the layout. + /// At this position, inserted instructions will be appended to `block`. + fn goto_bottom(&mut self, block: ir::Block) { + debug_assert!(self.layout().is_block_inserted(block)); + self.set_position(CursorPosition::After(block)); + } + + /// Go to the top of the next block in layout order and return it. + /// + /// - If the cursor wasn't pointing at anything, go to the top of the first block in the + /// function. + /// - If there are no more blocks, leave the cursor pointing at nothing and return `None`. + /// + /// # Examples + /// + /// The `next_block()` method is intended for iterating over the blocks in layout order: + /// + /// ``` + /// # use cranelift_codegen::ir::{Function, Block}; + /// # use cranelift_codegen::cursor::{Cursor, FuncCursor}; + /// fn edit_func(func: &mut Function) { + /// let mut cursor = FuncCursor::new(func); + /// while let Some(block) = cursor.next_block() { + /// // Edit block. + /// } + /// } + /// ``` + fn next_block(&mut self) -> Option { + let next = if let Some(block) = self.current_block() { + self.layout().next_block(block) + } else { + self.layout().entry_block() + }; + self.set_position(match next { + Some(block) => CursorPosition::Before(block), + None => CursorPosition::Nowhere, + }); + next + } + + /// Go to the bottom of the previous block in layout order and return it. + /// + /// - If the cursor wasn't pointing at anything, go to the bottom of the last block in the + /// function. + /// - If there are no more blocks, leave the cursor pointing at nothing and return `None`. + /// + /// # Examples + /// + /// The `prev_block()` method is intended for iterating over the blocks in backwards layout order: + /// + /// ``` + /// # use cranelift_codegen::ir::{Function, Block}; + /// # use cranelift_codegen::cursor::{Cursor, FuncCursor}; + /// fn edit_func(func: &mut Function) { + /// let mut cursor = FuncCursor::new(func); + /// while let Some(block) = cursor.prev_block() { + /// // Edit block. + /// } + /// } + /// ``` + fn prev_block(&mut self) -> Option { + let prev = if let Some(block) = self.current_block() { + self.layout().prev_block(block) + } else { + self.layout().last_block() + }; + self.set_position(match prev { + Some(block) => CursorPosition::After(block), + None => CursorPosition::Nowhere, + }); + prev + } + + /// Move to the next instruction in the same block and return it. + /// + /// - If the cursor was positioned before a block, go to the first instruction in that block. + /// - If there are no more instructions in the block, go to the `After(block)` position and return + /// `None`. + /// - If the cursor wasn't pointing anywhere, keep doing that. + /// + /// This method will never move the cursor to a different block. + /// + /// # Examples + /// + /// The `next_inst()` method is intended for iterating over the instructions in a block like + /// this: + /// + /// ``` + /// # use cranelift_codegen::ir::{Function, Block}; + /// # use cranelift_codegen::cursor::{Cursor, FuncCursor}; + /// fn edit_block(func: &mut Function, block: Block) { + /// let mut cursor = FuncCursor::new(func).at_top(block); + /// while let Some(inst) = cursor.next_inst() { + /// // Edit instructions... + /// } + /// } + /// ``` + /// The loop body can insert and remove instructions via the cursor. + /// + /// Iterating over all the instructions in a function looks like this: + /// + /// ``` + /// # use cranelift_codegen::ir::{Function, Block}; + /// # use cranelift_codegen::cursor::{Cursor, FuncCursor}; + /// fn edit_func(func: &mut Function) { + /// let mut cursor = FuncCursor::new(func); + /// while let Some(block) = cursor.next_block() { + /// while let Some(inst) = cursor.next_inst() { + /// // Edit instructions... + /// } + /// } + /// } + /// ``` + fn next_inst(&mut self) -> Option { + use self::CursorPosition::*; + match self.position() { + Nowhere | After(..) => None, + At(inst) => { + if let Some(next) = self.layout().next_inst(inst) { + self.set_position(At(next)); + Some(next) + } else { + let pos = After( + self.layout() + .inst_block(inst) + .expect("current instruction removed?"), + ); + self.set_position(pos); + None + } + } + Before(block) => { + if let Some(next) = self.layout().first_inst(block) { + self.set_position(At(next)); + Some(next) + } else { + self.set_position(After(block)); + None + } + } + } + } + + /// Move to the previous instruction in the same block and return it. + /// + /// - If the cursor was positioned after a block, go to the last instruction in that block. + /// - If there are no more instructions in the block, go to the `Before(block)` position and return + /// `None`. + /// - If the cursor wasn't pointing anywhere, keep doing that. + /// + /// This method will never move the cursor to a different block. + /// + /// # Examples + /// + /// The `prev_inst()` method is intended for iterating backwards over the instructions in an + /// block like this: + /// + /// ``` + /// # use cranelift_codegen::ir::{Function, Block}; + /// # use cranelift_codegen::cursor::{Cursor, FuncCursor}; + /// fn edit_block(func: &mut Function, block: Block) { + /// let mut cursor = FuncCursor::new(func).at_bottom(block); + /// while let Some(inst) = cursor.prev_inst() { + /// // Edit instructions... + /// } + /// } + /// ``` + fn prev_inst(&mut self) -> Option { + use self::CursorPosition::*; + match self.position() { + Nowhere | Before(..) => None, + At(inst) => { + if let Some(prev) = self.layout().prev_inst(inst) { + self.set_position(At(prev)); + Some(prev) + } else { + let pos = Before( + self.layout() + .inst_block(inst) + .expect("current instruction removed?"), + ); + self.set_position(pos); + None + } + } + After(block) => { + if let Some(prev) = self.layout().last_inst(block) { + self.set_position(At(prev)); + Some(prev) + } else { + self.set_position(Before(block)); + None + } + } + } + } + + /// Insert an instruction at the current position. + /// + /// - If pointing at an instruction, the new instruction is inserted before the current + /// instruction. + /// - If pointing at the bottom of a block, the new instruction is appended to the block. + /// - Otherwise panic. + /// + /// In either case, the cursor is not moved, such that repeated calls to `insert_inst()` causes + /// instructions to appear in insertion order in the block. + fn insert_inst(&mut self, inst: ir::Inst) { + use self::CursorPosition::*; + match self.position() { + Nowhere | Before(..) => panic!("Invalid insert_inst position"), + At(cur) => self.layout_mut().insert_inst(inst, cur), + After(block) => self.layout_mut().append_inst(inst, block), + } + } + + /// Remove the instruction under the cursor. + /// + /// The cursor is left pointing at the position following the current instruction. + /// + /// Return the instruction that was removed. + fn remove_inst(&mut self) -> ir::Inst { + let inst = self.current_inst().expect("No instruction to remove"); + self.next_inst(); + self.layout_mut().remove_inst(inst); + inst + } + + /// Remove the instruction under the cursor. + /// + /// The cursor is left pointing at the position preceding the current instruction. + /// + /// Return the instruction that was removed. + fn remove_inst_and_step_back(&mut self) -> ir::Inst { + let inst = self.current_inst().expect("No instruction to remove"); + self.prev_inst(); + self.layout_mut().remove_inst(inst); + inst + } + + /// Insert a block at the current position and switch to it. + /// + /// As far as possible, this method behaves as if the block header were an instruction inserted + /// at the current position. + /// + /// - If the cursor is pointing at an existing instruction, *the current block is split in two* + /// and the current instruction becomes the first instruction in the inserted block. + /// - If the cursor points at the bottom of a block, the new block is inserted after the current + /// one, and moved to the bottom of the new block where instructions can be appended. + /// - If the cursor points to the top of a block, the new block is inserted above the current one. + /// - If the cursor is not pointing at anything, the new block is placed last in the layout. + /// + /// This means that it is always valid to call this method, and it always leaves the cursor in + /// a state that will insert instructions into the new block. + fn insert_block(&mut self, new_block: ir::Block) { + use self::CursorPosition::*; + match self.position() { + At(inst) => { + self.layout_mut().split_block(new_block, inst); + // All other cases move to `After(block)`, but in this case we'll stay `At(inst)`. + return; + } + Nowhere => self.layout_mut().append_block(new_block), + Before(block) => self.layout_mut().insert_block(new_block, block), + After(block) => self.layout_mut().insert_block_after(new_block, block), + } + // For everything but `At(inst)` we end up appending to the new block. + self.set_position(After(new_block)); + } +} + +/// Function cursor. +/// +/// A `FuncCursor` holds a mutable reference to a whole `ir::Function` while keeping a position +/// too. The function can be re-borrowed by accessing the public `cur.func` member. +/// +/// This cursor is for use before legalization. The inserted instructions are not given an +/// encoding. +pub struct FuncCursor<'f> { + pos: CursorPosition, + srcloc: ir::SourceLoc, + + /// The referenced function. + pub func: &'f mut ir::Function, +} + +impl<'f> FuncCursor<'f> { + /// Create a new `FuncCursor` pointing nowhere. + pub fn new(func: &'f mut ir::Function) -> Self { + Self { + pos: CursorPosition::Nowhere, + srcloc: Default::default(), + func, + } + } + + /// Use the source location of `inst` for future instructions. + pub fn use_srcloc(&mut self, inst: ir::Inst) { + self.srcloc = self.func.srcloc(inst); + } + + /// Create an instruction builder that inserts an instruction at the current position. + pub fn ins(&mut self) -> ir::InsertBuilder<'_, &mut FuncCursor<'f>> { + ir::InsertBuilder::new(self) + } +} + +impl<'f> Cursor for FuncCursor<'f> { + fn position(&self) -> CursorPosition { + self.pos + } + + fn set_position(&mut self, pos: CursorPosition) { + self.pos = pos + } + + fn srcloc(&self) -> ir::SourceLoc { + self.srcloc + } + + fn set_srcloc(&mut self, srcloc: ir::SourceLoc) { + self.func.params.ensure_base_srcloc(srcloc); + self.srcloc = srcloc; + } + + fn layout(&self) -> &ir::Layout { + &self.func.layout + } + + fn layout_mut(&mut self) -> &mut ir::Layout { + &mut self.func.layout + } +} + +impl<'c, 'f> ir::InstInserterBase<'c> for &'c mut FuncCursor<'f> { + fn data_flow_graph(&self) -> &ir::DataFlowGraph { + &self.func.dfg + } + + fn data_flow_graph_mut(&mut self) -> &mut ir::DataFlowGraph { + &mut self.func.dfg + } + + fn insert_built_inst(self, inst: ir::Inst) -> &'c mut ir::DataFlowGraph { + self.insert_inst(inst); + if !self.srcloc.is_default() { + self.func.set_srcloc(inst, self.srcloc); + } + &mut self.func.dfg + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/data_value.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/data_value.rs new file mode 100644 index 000000000..523182f07 --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/data_value.rs @@ -0,0 +1,402 @@ +//! This module gives users to instantiate values that Cranelift understands. These values are used, +//! for example, during interpretation and for wrapping immediates. +use crate::ir::immediates::{Ieee128, Ieee16, Ieee32, Ieee64, Offset32}; +use crate::ir::{types, ConstantData, Type}; +use core::cmp::Ordering; +use core::fmt::{self, Display, Formatter}; + +/// Represent a data value. Where [Value] is an SSA reference, [DataValue] is the type + value +/// that would be referred to by a [Value]. +/// +/// [Value]: crate::ir::Value +#[allow(missing_docs)] +#[derive(Clone, Debug, PartialOrd)] +pub enum DataValue { + I8(i8), + I16(i16), + I32(i32), + I64(i64), + I128(i128), + F16(Ieee16), + F32(Ieee32), + F64(Ieee64), + F128(Ieee128), + V128([u8; 16]), + V64([u8; 8]), +} + +impl PartialEq for DataValue { + fn eq(&self, other: &Self) -> bool { + use DataValue::*; + match (self, other) { + (I8(l), I8(r)) => l == r, + (I8(_), _) => false, + (I16(l), I16(r)) => l == r, + (I16(_), _) => false, + (I32(l), I32(r)) => l == r, + (I32(_), _) => false, + (I64(l), I64(r)) => l == r, + (I64(_), _) => false, + (I128(l), I128(r)) => l == r, + (I128(_), _) => false, + (F16(l), F16(r)) => l.partial_cmp(&r) == Some(Ordering::Equal), + (F16(_), _) => false, + (F32(l), F32(r)) => l.as_f32() == r.as_f32(), + (F32(_), _) => false, + (F64(l), F64(r)) => l.as_f64() == r.as_f64(), + (F64(_), _) => false, + (F128(l), F128(r)) => l.partial_cmp(&r) == Some(Ordering::Equal), + (F128(_), _) => false, + (V128(l), V128(r)) => l == r, + (V128(_), _) => false, + (V64(l), V64(r)) => l == r, + (V64(_), _) => false, + } + } +} + +impl DataValue { + /// Try to cast an immediate integer (a wrapped `i64` on most Cranelift instructions) to the + /// given Cranelift [Type]. + pub fn from_integer(imm: i128, ty: Type) -> Result { + match ty { + types::I8 => Ok(DataValue::I8(imm as i8)), + types::I16 => Ok(DataValue::I16(imm as i16)), + types::I32 => Ok(DataValue::I32(imm as i32)), + types::I64 => Ok(DataValue::I64(imm as i64)), + types::I128 => Ok(DataValue::I128(imm)), + _ => Err(DataValueCastFailure::FromInteger(imm, ty)), + } + } + + /// Return the Cranelift IR [Type] for this [DataValue]. + pub fn ty(&self) -> Type { + match self { + DataValue::I8(_) => types::I8, + DataValue::I16(_) => types::I16, + DataValue::I32(_) => types::I32, + DataValue::I64(_) => types::I64, + DataValue::I128(_) => types::I128, + DataValue::F16(_) => types::F16, + DataValue::F32(_) => types::F32, + DataValue::F64(_) => types::F64, + DataValue::F128(_) => types::F128, + DataValue::V128(_) => types::I8X16, // A default type. + DataValue::V64(_) => types::I8X8, // A default type. + } + } + + /// Return true if the value is a vector (i.e. `DataValue::V128`). + pub fn is_vector(&self) -> bool { + match self { + DataValue::V128(_) | DataValue::V64(_) => true, + _ => false, + } + } + + fn swap_bytes(self) -> Self { + match self { + DataValue::I8(i) => DataValue::I8(i.swap_bytes()), + DataValue::I16(i) => DataValue::I16(i.swap_bytes()), + DataValue::I32(i) => DataValue::I32(i.swap_bytes()), + DataValue::I64(i) => DataValue::I64(i.swap_bytes()), + DataValue::I128(i) => DataValue::I128(i.swap_bytes()), + DataValue::F16(f) => DataValue::F16(Ieee16::with_bits(f.bits().swap_bytes())), + DataValue::F32(f) => DataValue::F32(Ieee32::with_bits(f.bits().swap_bytes())), + DataValue::F64(f) => DataValue::F64(Ieee64::with_bits(f.bits().swap_bytes())), + DataValue::F128(f) => DataValue::F128(Ieee128::with_bits(f.bits().swap_bytes())), + DataValue::V128(mut v) => { + v.reverse(); + DataValue::V128(v) + } + DataValue::V64(mut v) => { + v.reverse(); + DataValue::V64(v) + } + } + } + + /// Converts `self` to big endian from target's endianness. + pub fn to_be(self) -> Self { + if cfg!(target_endian = "big") { + self + } else { + self.swap_bytes() + } + } + + /// Converts `self` to little endian from target's endianness. + pub fn to_le(self) -> Self { + if cfg!(target_endian = "little") { + self + } else { + self.swap_bytes() + } + } + + /// Write a [DataValue] to a slice in native-endian byte order. + /// + /// # Panics: + /// + /// Panics if the slice does not have enough space to accommodate the [DataValue] + pub fn write_to_slice_ne(&self, dst: &mut [u8]) { + match self { + DataValue::I8(i) => dst[..1].copy_from_slice(&i.to_ne_bytes()[..]), + DataValue::I16(i) => dst[..2].copy_from_slice(&i.to_ne_bytes()[..]), + DataValue::I32(i) => dst[..4].copy_from_slice(&i.to_ne_bytes()[..]), + DataValue::I64(i) => dst[..8].copy_from_slice(&i.to_ne_bytes()[..]), + DataValue::I128(i) => dst[..16].copy_from_slice(&i.to_ne_bytes()[..]), + DataValue::F16(f) => dst[..2].copy_from_slice(&f.bits().to_ne_bytes()[..]), + DataValue::F32(f) => dst[..4].copy_from_slice(&f.bits().to_ne_bytes()[..]), + DataValue::F64(f) => dst[..8].copy_from_slice(&f.bits().to_ne_bytes()[..]), + DataValue::F128(f) => dst[..16].copy_from_slice(&f.bits().to_ne_bytes()[..]), + DataValue::V128(v) => dst[..16].copy_from_slice(&v[..]), + DataValue::V64(v) => dst[..8].copy_from_slice(&v[..]), + }; + } + + /// Write a [DataValue] to a slice in big-endian byte order. + /// + /// # Panics: + /// + /// Panics if the slice does not have enough space to accommodate the [DataValue] + pub fn write_to_slice_be(&self, dst: &mut [u8]) { + self.clone().to_be().write_to_slice_ne(dst); + } + + /// Write a [DataValue] to a slice in little-endian byte order. + /// + /// # Panics: + /// + /// Panics if the slice does not have enough space to accommodate the [DataValue] + pub fn write_to_slice_le(&self, dst: &mut [u8]) { + self.clone().to_le().write_to_slice_ne(dst); + } + + /// Read a [DataValue] from a slice using a given [Type] with native-endian byte order. + /// + /// # Panics: + /// + /// Panics if the slice does not have enough space to accommodate the [DataValue] + pub fn read_from_slice_ne(src: &[u8], ty: Type) -> Self { + match ty { + types::I8 => DataValue::I8(i8::from_ne_bytes(src[..1].try_into().unwrap())), + types::I16 => DataValue::I16(i16::from_ne_bytes(src[..2].try_into().unwrap())), + types::I32 => DataValue::I32(i32::from_ne_bytes(src[..4].try_into().unwrap())), + types::I64 => DataValue::I64(i64::from_ne_bytes(src[..8].try_into().unwrap())), + types::I128 => DataValue::I128(i128::from_ne_bytes(src[..16].try_into().unwrap())), + types::F16 => DataValue::F16(Ieee16::with_bits(u16::from_ne_bytes( + src[..2].try_into().unwrap(), + ))), + types::F32 => DataValue::F32(Ieee32::with_bits(u32::from_ne_bytes( + src[..4].try_into().unwrap(), + ))), + types::F64 => DataValue::F64(Ieee64::with_bits(u64::from_ne_bytes( + src[..8].try_into().unwrap(), + ))), + types::F128 => DataValue::F128(Ieee128::with_bits(u128::from_ne_bytes( + src[..16].try_into().unwrap(), + ))), + _ if ty.is_vector() => { + if ty.bytes() == 16 { + DataValue::V128(src[..16].try_into().unwrap()) + } else if ty.bytes() == 8 { + DataValue::V64(src[..8].try_into().unwrap()) + } else { + unimplemented!() + } + } + _ => unimplemented!(), + } + } + + /// Read a [DataValue] from a slice using a given [Type] in big-endian byte order. + /// + /// # Panics: + /// + /// Panics if the slice does not have enough space to accommodate the [DataValue] + pub fn read_from_slice_be(src: &[u8], ty: Type) -> Self { + DataValue::read_from_slice_ne(src, ty).to_be() + } + + /// Read a [DataValue] from a slice using a given [Type] in little-endian byte order. + /// + /// # Panics: + /// + /// Panics if the slice does not have enough space to accommodate the [DataValue] + pub fn read_from_slice_le(src: &[u8], ty: Type) -> Self { + DataValue::read_from_slice_ne(src, ty).to_le() + } + + /// Write a [DataValue] to a memory location in native-endian byte order. + pub unsafe fn write_value_to(&self, p: *mut u128) { + let size = self.ty().bytes() as usize; + self.write_to_slice_ne(std::slice::from_raw_parts_mut(p as *mut u8, size)); + } + + /// Read a [DataValue] from a memory location using a given [Type] in native-endian byte order. + pub unsafe fn read_value_from(p: *const u128, ty: Type) -> Self { + DataValue::read_from_slice_ne( + std::slice::from_raw_parts(p as *const u8, ty.bytes() as usize), + ty, + ) + } + + /// Performs a bitwise comparison over the contents of [DataValue]. + /// + /// Returns true if all bits are equal. + /// + /// This behaviour is different from PartialEq for NaN floats. + pub fn bitwise_eq(&self, other: &DataValue) -> bool { + match (self, other) { + // We need to bit compare the floats to ensure that we produce the correct values + // on NaN's. The test suite expects to assert the precise bit pattern on NaN's or + // works around it in the tests themselves. + (DataValue::F16(a), DataValue::F16(b)) => a.bits() == b.bits(), + (DataValue::F32(a), DataValue::F32(b)) => a.bits() == b.bits(), + (DataValue::F64(a), DataValue::F64(b)) => a.bits() == b.bits(), + (DataValue::F128(a), DataValue::F128(b)) => a.bits() == b.bits(), + + // We don't need to worry about F32x4 / F64x2 Since we compare V128 which is already the + // raw bytes anyway + (a, b) => a == b, + } + } +} + +/// Record failures to cast [DataValue]. +#[derive(Debug, PartialEq)] +#[allow(missing_docs)] +pub enum DataValueCastFailure { + TryInto(Type, Type), + FromInteger(i128, Type), +} + +// This is manually implementing Error and Display instead of using thiserror to reduce the amount +// of dependencies used by Cranelift. +impl std::error::Error for DataValueCastFailure {} + +impl Display for DataValueCastFailure { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + match self { + DataValueCastFailure::TryInto(from, to) => { + write!(f, "unable to cast data value of type {from} to type {to}") + } + DataValueCastFailure::FromInteger(val, to) => { + write!(f, "unable to cast i64({val}) to a data value of type {to}") + } + } + } +} + +/// Helper for creating conversion implementations for [DataValue]. +macro_rules! build_conversion_impl { + ( $rust_ty:ty, $data_value_ty:ident, $cranelift_ty:ident ) => { + impl From<$rust_ty> for DataValue { + fn from(data: $rust_ty) -> Self { + DataValue::$data_value_ty(data) + } + } + + impl TryInto<$rust_ty> for DataValue { + type Error = DataValueCastFailure; + fn try_into(self) -> Result<$rust_ty, Self::Error> { + if let DataValue::$data_value_ty(v) = self { + Ok(v) + } else { + Err(DataValueCastFailure::TryInto( + self.ty(), + types::$cranelift_ty, + )) + } + } + } + }; +} +build_conversion_impl!(i8, I8, I8); +build_conversion_impl!(i16, I16, I16); +build_conversion_impl!(i32, I32, I32); +build_conversion_impl!(i64, I64, I64); +build_conversion_impl!(i128, I128, I128); +build_conversion_impl!(Ieee16, F16, F16); +build_conversion_impl!(Ieee32, F32, F32); +build_conversion_impl!(Ieee64, F64, F64); +build_conversion_impl!(Ieee128, F128, F128); +build_conversion_impl!([u8; 16], V128, I8X16); +build_conversion_impl!([u8; 8], V64, I8X8); +impl From for DataValue { + fn from(o: Offset32) -> Self { + DataValue::from(Into::::into(o)) + } +} + +impl Display for DataValue { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + DataValue::I8(dv) => write!(f, "{dv}"), + DataValue::I16(dv) => write!(f, "{dv}"), + DataValue::I32(dv) => write!(f, "{dv}"), + DataValue::I64(dv) => write!(f, "{dv}"), + DataValue::I128(dv) => write!(f, "{dv}"), + // The Ieee* wrappers here print the expected syntax. + DataValue::F16(dv) => write!(f, "{dv}"), + DataValue::F32(dv) => write!(f, "{dv}"), + DataValue::F64(dv) => write!(f, "{dv}"), + DataValue::F128(dv) => write!(f, "{dv}"), + // Again, for syntax consistency, use ConstantData, which in this case displays as hex. + DataValue::V128(dv) => write!(f, "{}", ConstantData::from(&dv[..])), + DataValue::V64(dv) => write!(f, "{}", ConstantData::from(&dv[..])), + } + } +} + +/// Helper structure for printing bracket-enclosed vectors of [DataValue]s. +/// - for empty vectors, display `[]` +/// - for single item vectors, display `42`, e.g. +/// - for multiple item vectors, display `[42, 43, 44]`, e.g. +pub struct DisplayDataValues<'a>(pub &'a [DataValue]); + +impl<'a> Display for DisplayDataValues<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + if self.0.len() == 1 { + write!(f, "{}", self.0[0]) + } else { + write!(f, "[")?; + write_data_value_list(f, &self.0)?; + write!(f, "]") + } + } +} + +/// Helper function for displaying `Vec`. +pub fn write_data_value_list(f: &mut Formatter<'_>, list: &[DataValue]) -> fmt::Result { + match list.len() { + 0 => Ok(()), + 1 => write!(f, "{}", list[0]), + _ => { + write!(f, "{}", list[0])?; + for dv in list.iter().skip(1) { + write!(f, ", {dv}")?; + } + Ok(()) + } + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn type_conversions() { + assert_eq!(DataValue::V128([0; 16]).ty(), types::I8X16); + assert_eq!( + TryInto::<[u8; 16]>::try_into(DataValue::V128([0; 16])).unwrap(), + [0; 16] + ); + assert_eq!( + TryInto::::try_into(DataValue::V128([0; 16])).unwrap_err(), + DataValueCastFailure::TryInto(types::I8X16, types::I32) + ); + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/dbg.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/dbg.rs new file mode 100644 index 000000000..4796f3967 --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/dbg.rs @@ -0,0 +1,28 @@ +//! Debug tracing helpers. +use core::fmt; + +/// Prefix added to the log file names, just before the thread name or id. +pub static LOG_FILENAME_PREFIX: &str = "cranelift.dbg."; + +/// Helper for printing lists. +pub struct DisplayList<'a, T>(pub &'a [T]) +where + T: 'a + fmt::Display; + +impl<'a, T> fmt::Display for DisplayList<'a, T> +where + T: 'a + fmt::Display, +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self.0.split_first() { + None => write!(f, "[]"), + Some((first, rest)) => { + write!(f, "[{first}")?; + for x in rest { + write!(f, ", {x}")?; + } + write!(f, "]") + } + } + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/dominator_tree.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/dominator_tree.rs new file mode 100644 index 000000000..92f7c143a --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/dominator_tree.rs @@ -0,0 +1,851 @@ +//! A Dominator Tree represented as mappings of Blocks to their immediate dominator. + +use crate::entity::SecondaryMap; +use crate::flowgraph::{BlockPredecessor, ControlFlowGraph}; +use crate::ir::{Block, Function, Layout, ProgramPoint}; +use crate::packed_option::PackedOption; +use crate::timing; +use alloc::vec::Vec; +use core::cmp; +use core::cmp::Ordering; +use core::mem; + +mod simple; + +pub use simple::SimpleDominatorTree; + +/// Spanning tree node, used during domtree computation. +#[derive(Clone, Default)] +struct SpanningTreeNode { + /// This node's block in function CFG. + block: PackedOption, + /// Node's ancestor in the spanning tree. + /// Gets invalidated during semi-dominator computation. + ancestor: u32, + /// The smallest semi value discovered on any semi-dominator path + /// that went through the node up till the moment. + /// Gets updated in the course of semi-dominator computation. + label: u32, + /// Semidominator value for the node. + semi: u32, + /// Immediate dominator value for the node. + /// Initialized to node's ancestor in the spanning tree. + idom: u32, +} + +/// DFS preorder number for unvisited nodes and the virtual root in the spanning tree. +const NOT_VISITED: u32 = 0; + +/// Spanning tree, in CFG preorder. +/// Node 0 is the virtual root and doesn't have a corresponding block. +/// It's not required because function's CFG in Cranelift always have +/// a singular root, but helps to avoid additional checks. +/// Numbering nodes from 0 also follows the convention in +/// `SimpleDominatorTree` and `DominatorTreePreorder`. +#[derive(Clone, Default)] +struct SpanningTree { + nodes: Vec, +} + +impl SpanningTree { + fn new() -> Self { + // Include the virtual root. + Self { + nodes: vec![Default::default()], + } + } + + fn with_capacity(capacity: usize) -> Self { + // Include the virtual root. + let mut nodes = Vec::with_capacity(capacity + 1); + nodes.push(Default::default()); + Self { nodes } + } + + fn len(&self) -> usize { + self.nodes.len() + } + + fn reserve(&mut self, capacity: usize) { + // Virtual root should be already included. + self.nodes.reserve(capacity); + } + + fn clear(&mut self) { + self.nodes.resize(1, Default::default()); + } + + /// Returns pre_number for the new node. + fn push(&mut self, ancestor: u32, block: Block) -> u32 { + // Virtual root should be already included. + debug_assert!(!self.nodes.is_empty()); + + let pre_number = self.nodes.len() as u32; + + self.nodes.push(SpanningTreeNode { + block: block.into(), + ancestor: ancestor, + label: pre_number, + semi: pre_number, + idom: ancestor, + }); + + pre_number + } +} + +impl std::ops::Index for SpanningTree { + type Output = SpanningTreeNode; + + fn index(&self, idx: u32) -> &Self::Output { + &self.nodes[idx as usize] + } +} + +impl std::ops::IndexMut for SpanningTree { + fn index_mut(&mut self, idx: u32) -> &mut Self::Output { + &mut self.nodes[idx as usize] + } +} + +/// Traversal event to compute both preorder spanning tree +/// and postorder block list. Can't use `Dfs` from traversals.rs +/// here because of the need for parent links. +enum TraversalEvent { + Enter(u32, Block), + Exit(Block), +} + +/// Dominator tree node. We keep one of these per block. +#[derive(Clone, Default)] +struct DominatorTreeNode { + /// Immediate dominator for the block, `None` for unreachable blocks. + idom: PackedOption, + /// Preorder traversal number, zero for unreachable blocks. + pre_number: u32, +} + +/// The dominator tree for a single function, +/// computed using Semi-NCA algorithm. +pub struct DominatorTree { + /// DFS spanning tree. + stree: SpanningTree, + /// List of CFG blocks in postorder. + postorder: Vec, + /// Dominator tree nodes. + nodes: SecondaryMap, + + /// Stack for building the spanning tree. + dfs_worklist: Vec, + /// Stack used for processing semidominator paths + /// in link-eval procedure. + eval_worklist: Vec, + + valid: bool, +} + +/// Methods for querying the dominator tree. +impl DominatorTree { + /// Is `block` reachable from the entry block? + pub fn is_reachable(&self, block: Block) -> bool { + self.nodes[block].pre_number != NOT_VISITED + } + + /// Get the CFG post-order of blocks that was used to compute the dominator tree. + /// + /// Note that this post-order is not updated automatically when the CFG is modified. It is + /// computed from scratch and cached by `compute()`. + pub fn cfg_postorder(&self) -> &[Block] { + debug_assert!(self.is_valid()); + &self.postorder + } + + /// Get an iterator over CFG reverse post-order of blocks used to compute the dominator tree. + /// + /// Note that the post-order is not updated automatically when the CFG is modified. It is + /// computed from scratch and cached by `compute()`. + pub fn cfg_rpo(&self) -> impl Iterator { + debug_assert!(self.is_valid()); + self.postorder.iter().rev() + } + + /// Returns the immediate dominator of `block`. + /// + /// `block_a` is said to *dominate* `block_b` if all control flow paths from the function + /// entry to `block_b` must go through `block_a`. + /// + /// The *immediate dominator* is the dominator that is closest to `block`. All other dominators + /// also dominate the immediate dominator. + /// + /// This returns `None` if `block` is not reachable from the entry block, or if it is the entry block + /// which has no dominators. + pub fn idom(&self, block: Block) -> Option { + self.nodes[block].idom.into() + } + + /// Returns `true` if `a` dominates `b`. + /// + /// This means that every control-flow path from the function entry to `b` must go through `a`. + /// + /// Dominance is ill defined for unreachable blocks. This function can always determine + /// dominance for instructions in the same block, but otherwise returns `false` if either block + /// is unreachable. + /// + /// An instruction is considered to dominate itself. + /// A block is also considered to dominate itself. + pub fn dominates(&self, a: A, b: B, layout: &Layout) -> bool + where + A: Into, + B: Into, + { + let a = a.into(); + let b = b.into(); + match a { + ProgramPoint::Block(block_a) => match b { + ProgramPoint::Block(block_b) => self.block_dominates(block_a, block_b), + ProgramPoint::Inst(inst_b) => { + let block_b = layout + .inst_block(inst_b) + .expect("Instruction not in layout."); + self.block_dominates(block_a, block_b) + } + }, + ProgramPoint::Inst(inst_a) => { + let block_a: Block = layout + .inst_block(inst_a) + .expect("Instruction not in layout."); + match b { + ProgramPoint::Block(block_b) => { + block_a != block_b && self.block_dominates(block_a, block_b) + } + ProgramPoint::Inst(inst_b) => { + let block_b = layout + .inst_block(inst_b) + .expect("Instruction not in layout."); + if block_a == block_b { + layout.pp_cmp(a, b) != Ordering::Greater + } else { + self.block_dominates(block_a, block_b) + } + } + } + } + } + } + + /// Returns `true` if `block_a` dominates `block_b`. + /// + /// A block is considered to dominate itself. + fn block_dominates(&self, block_a: Block, mut block_b: Block) -> bool { + let pre_a = self.nodes[block_a].pre_number; + + // Run a finger up the dominator tree from b until we see a. + // Do nothing if b is unreachable. + while pre_a < self.nodes[block_b].pre_number { + let idom = match self.idom(block_b) { + Some(idom) => idom, + None => return false, // a is unreachable, so we climbed past the entry + }; + block_b = idom; + } + + block_a == block_b + } +} + +impl DominatorTree { + /// Allocate a new blank dominator tree. Use `compute` to compute the dominator tree for a + /// function. + pub fn new() -> Self { + Self { + stree: SpanningTree::new(), + nodes: SecondaryMap::new(), + postorder: Vec::new(), + dfs_worklist: Vec::new(), + eval_worklist: Vec::new(), + valid: false, + } + } + + /// Allocate and compute a dominator tree. + pub fn with_function(func: &Function, cfg: &ControlFlowGraph) -> Self { + let block_capacity = func.layout.block_capacity(); + let mut domtree = Self { + stree: SpanningTree::with_capacity(block_capacity), + nodes: SecondaryMap::with_capacity(block_capacity), + postorder: Vec::with_capacity(block_capacity), + dfs_worklist: Vec::new(), + eval_worklist: Vec::new(), + valid: false, + }; + domtree.compute(func, cfg); + domtree + } + + /// Reset and compute a CFG post-order and dominator tree, + /// using Semi-NCA algorithm, described in the paper: + /// + /// Linear-Time Algorithms for Dominators and Related Problems. + /// Loukas Georgiadis, Princeton University, November 2005. + /// + /// The same algorithm is used by Julia, SpiderMonkey and LLVM, + /// the implementation is heavily inspired by them. + pub fn compute(&mut self, func: &Function, cfg: &ControlFlowGraph) { + let _tt = timing::domtree(); + debug_assert!(cfg.is_valid()); + + self.clear(); + self.compute_spanning_tree(func); + self.compute_domtree(cfg); + + self.valid = true; + } + + /// Clear the data structures used to represent the dominator tree. This will leave the tree in + /// a state where `is_valid()` returns false. + pub fn clear(&mut self) { + self.stree.clear(); + self.nodes.clear(); + self.postorder.clear(); + self.valid = false; + } + + /// Check if the dominator tree is in a valid state. + /// + /// Note that this doesn't perform any kind of validity checks. It simply checks if the + /// `compute()` method has been called since the last `clear()`. It does not check that the + /// dominator tree is consistent with the CFG. + pub fn is_valid(&self) -> bool { + self.valid + } + + /// Reset all internal data structures, build spanning tree + /// and compute a post-order of the control flow graph. + fn compute_spanning_tree(&mut self, func: &Function) { + self.nodes.resize(func.dfg.num_blocks()); + self.stree.reserve(func.dfg.num_blocks()); + + if let Some(block) = func.layout.entry_block() { + self.dfs_worklist.push(TraversalEvent::Enter(0, block)); + } + + loop { + match self.dfs_worklist.pop() { + Some(TraversalEvent::Enter(parent, block)) => { + let node = &mut self.nodes[block]; + if node.pre_number != NOT_VISITED { + continue; + } + + self.dfs_worklist.push(TraversalEvent::Exit(block)); + + let pre_number = self.stree.push(parent, block); + node.pre_number = pre_number; + + // Use the same traversal heuristics as in traversals.rs. + self.dfs_worklist.extend( + func.block_successors(block) + // Heuristic: chase the children in reverse. This puts + // the first successor block first in the postorder, all + // other things being equal, which tends to prioritize + // loop backedges over out-edges, putting the edge-block + // closer to the loop body and minimizing live-ranges in + // linear instruction space. This heuristic doesn't have + // any effect on the computation of dominators, and is + // purely for other consumers of the postorder we cache + // here. + .rev() + // A simple optimization: push less items to the stack. + .filter(|successor| self.nodes[*successor].pre_number == NOT_VISITED) + .map(|successor| TraversalEvent::Enter(pre_number, successor)), + ); + } + Some(TraversalEvent::Exit(block)) => self.postorder.push(block), + None => break, + } + } + } + + /// Eval-link procedure from the paper. + /// For a predecessor V of node W returns V if V < W, otherwise the minimum of sdom(U), + /// where U > W and U is on a semi-dominator path for W in CFG. + /// Use path compression to bring complexity down to O(m*log(n)). + fn eval(&mut self, v: u32, last_linked: u32) -> u32 { + if self.stree[v].ancestor < last_linked { + return self.stree[v].label; + } + + // Follow semi-dominator path. + let mut root = v; + loop { + self.eval_worklist.push(root); + root = self.stree[root].ancestor; + + if self.stree[root].ancestor < last_linked { + break; + } + } + + let mut prev = root; + let root = self.stree[prev].ancestor; + + // Perform path compression. Point all ancestors to the root + // and propagate minimal sdom(U) value from ancestors to children. + while let Some(curr) = self.eval_worklist.pop() { + if self.stree[prev].label < self.stree[curr].label { + self.stree[curr].label = self.stree[prev].label; + } + + self.stree[curr].ancestor = root; + prev = curr; + } + + self.stree[v].label + } + + fn compute_domtree(&mut self, cfg: &ControlFlowGraph) { + // Compute semi-dominators. + for w in (1..self.stree.len() as u32).rev() { + let w_node = &mut self.stree[w]; + let block = w_node.block.expect("Virtual root must have been excluded"); + let mut semi = w_node.ancestor; + + let last_linked = w + 1; + + for pred in cfg + .pred_iter(block) + .map(|pred: BlockPredecessor| pred.block) + { + // Skip unreachable nodes. + if self.nodes[pred].pre_number == NOT_VISITED { + continue; + } + + let semi_candidate = self.eval(self.nodes[pred].pre_number, last_linked); + semi = std::cmp::min(semi, semi_candidate); + } + + let w_node = &mut self.stree[w]; + w_node.label = semi; + w_node.semi = semi; + } + + // Compute immediate dominators. + for v in 1..self.stree.len() as u32 { + let semi = self.stree[v].semi; + let block = self.stree[v] + .block + .expect("Virtual root must have been excluded"); + let mut idom = self.stree[v].idom; + + while idom > semi { + idom = self.stree[idom].idom; + } + + self.stree[v].idom = idom; + + self.nodes[block].idom = self.stree[idom].block; + } + } +} + +/// Optional pre-order information that can be computed for a dominator tree. +/// +/// This data structure is computed from a `DominatorTree` and provides: +/// +/// - A forward traversable dominator tree through the `children()` iterator. +/// - An ordering of blocks according to a dominator tree pre-order. +/// - Constant time dominance checks at the block granularity. +/// +/// The information in this auxiliary data structure is not easy to update when the control flow +/// graph changes, which is why it is kept separate. +pub struct DominatorTreePreorder { + nodes: SecondaryMap, + + // Scratch memory used by `compute_postorder()`. + stack: Vec, +} + +#[derive(Default, Clone)] +struct ExtraNode { + /// First child node in the domtree. + child: PackedOption, + + /// Next sibling node in the domtree. This linked list is ordered according to the CFG RPO. + sibling: PackedOption, + + /// Sequence number for this node in a pre-order traversal of the dominator tree. + /// Unreachable blocks have number 0, the entry block is 1. + pre_number: u32, + + /// Maximum `pre_number` for the sub-tree of the dominator tree that is rooted at this node. + /// This is always >= `pre_number`. + pre_max: u32, +} + +/// Creating and computing the dominator tree pre-order. +impl DominatorTreePreorder { + /// Create a new blank `DominatorTreePreorder`. + pub fn new() -> Self { + Self { + nodes: SecondaryMap::new(), + stack: Vec::new(), + } + } + + /// Recompute this data structure to match `domtree`. + pub fn compute(&mut self, domtree: &DominatorTree) { + self.nodes.clear(); + + // Step 1: Populate the child and sibling links. + // + // By following the CFG post-order and pushing to the front of the lists, we make sure that + // sibling lists are ordered according to the CFG reverse post-order. + for &block in domtree.cfg_postorder() { + if let Some(idom) = domtree.idom(block) { + let sib = mem::replace(&mut self.nodes[idom].child, block.into()); + self.nodes[block].sibling = sib; + } else { + // The only block without an immediate dominator is the entry. + self.stack.push(block); + } + } + + // Step 2. Assign pre-order numbers from a DFS of the dominator tree. + debug_assert!(self.stack.len() <= 1); + let mut n = 0; + while let Some(block) = self.stack.pop() { + n += 1; + let node = &mut self.nodes[block]; + node.pre_number = n; + node.pre_max = n; + if let Some(n) = node.sibling.expand() { + self.stack.push(n); + } + if let Some(n) = node.child.expand() { + self.stack.push(n); + } + } + + // Step 3. Propagate the `pre_max` numbers up the tree. + // The CFG post-order is topologically ordered w.r.t. dominance so a node comes after all + // its dominator tree children. + for &block in domtree.cfg_postorder() { + if let Some(idom) = domtree.idom(block) { + let pre_max = cmp::max(self.nodes[block].pre_max, self.nodes[idom].pre_max); + self.nodes[idom].pre_max = pre_max; + } + } + } +} + +/// An iterator that enumerates the direct children of a block in the dominator tree. +pub struct ChildIter<'a> { + dtpo: &'a DominatorTreePreorder, + next: PackedOption, +} + +impl<'a> Iterator for ChildIter<'a> { + type Item = Block; + + fn next(&mut self) -> Option { + let n = self.next.expand(); + if let Some(block) = n { + self.next = self.dtpo.nodes[block].sibling; + } + n + } +} + +/// Query interface for the dominator tree pre-order. +impl DominatorTreePreorder { + /// Get an iterator over the direct children of `block` in the dominator tree. + /// + /// These are the block's whose immediate dominator is an instruction in `block`, ordered according + /// to the CFG reverse post-order. + pub fn children(&self, block: Block) -> ChildIter { + ChildIter { + dtpo: self, + next: self.nodes[block].child, + } + } + + /// Fast, constant time dominance check with block granularity. + /// + /// This computes the same result as `domtree.dominates(a, b)`, but in guaranteed fast constant + /// time. This is less general than the `DominatorTree` method because it only works with block + /// program points. + /// + /// A block is considered to dominate itself. + pub fn dominates(&self, a: Block, b: Block) -> bool { + let na = &self.nodes[a]; + let nb = &self.nodes[b]; + na.pre_number <= nb.pre_number && na.pre_max >= nb.pre_max + } + + /// Compare two blocks according to the dominator pre-order. + pub fn pre_cmp_block(&self, a: Block, b: Block) -> Ordering { + self.nodes[a].pre_number.cmp(&self.nodes[b].pre_number) + } + + /// Compare two program points according to the dominator tree pre-order. + /// + /// This ordering of program points have the property that given a program point, pp, all the + /// program points dominated by pp follow immediately and contiguously after pp in the order. + pub fn pre_cmp(&self, a: A, b: B, layout: &Layout) -> Ordering + where + A: Into, + B: Into, + { + let a = a.into(); + let b = b.into(); + self.pre_cmp_block(layout.pp_block(a), layout.pp_block(b)) + .then_with(|| layout.pp_cmp(a, b)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::cursor::{Cursor, FuncCursor}; + use crate::ir::types::*; + use crate::ir::{InstBuilder, TrapCode}; + + #[test] + fn empty() { + let func = Function::new(); + let cfg = ControlFlowGraph::with_function(&func); + debug_assert!(cfg.is_valid()); + let dtree = DominatorTree::with_function(&func, &cfg); + assert_eq!(0, dtree.nodes.keys().count()); + assert_eq!(dtree.cfg_postorder(), &[]); + + let mut dtpo = DominatorTreePreorder::new(); + dtpo.compute(&dtree); + } + + #[test] + fn unreachable_node() { + let mut func = Function::new(); + let block0 = func.dfg.make_block(); + let v0 = func.dfg.append_block_param(block0, I32); + let block1 = func.dfg.make_block(); + let block2 = func.dfg.make_block(); + let trap_block = func.dfg.make_block(); + + let mut cur = FuncCursor::new(&mut func); + + cur.insert_block(block0); + cur.ins().brif(v0, block2, &[], trap_block, &[]); + + cur.insert_block(trap_block); + cur.ins().trap(TrapCode::unwrap_user(1)); + + cur.insert_block(block1); + let v1 = cur.ins().iconst(I32, 1); + let v2 = cur.ins().iadd(v0, v1); + cur.ins().jump(block0, &[v2]); + + cur.insert_block(block2); + cur.ins().return_(&[v0]); + + let cfg = ControlFlowGraph::with_function(cur.func); + let dt = DominatorTree::with_function(cur.func, &cfg); + + // Fall-through-first, prune-at-source DFT: + // + // block0 { + // brif block2 { + // trap + // block2 { + // return + // } block2 + // } block0 + assert_eq!(dt.cfg_postorder(), &[block2, trap_block, block0]); + + let v2_def = cur.func.dfg.value_def(v2).unwrap_inst(); + assert!(!dt.dominates(v2_def, block0, &cur.func.layout)); + assert!(!dt.dominates(block0, v2_def, &cur.func.layout)); + + let mut dtpo = DominatorTreePreorder::new(); + dtpo.compute(&dt); + assert!(dtpo.dominates(block0, block0)); + assert!(!dtpo.dominates(block0, block1)); + assert!(dtpo.dominates(block0, block2)); + assert!(!dtpo.dominates(block1, block0)); + assert!(dtpo.dominates(block1, block1)); + assert!(!dtpo.dominates(block1, block2)); + assert!(!dtpo.dominates(block2, block0)); + assert!(!dtpo.dominates(block2, block1)); + assert!(dtpo.dominates(block2, block2)); + } + + #[test] + fn non_zero_entry_block() { + let mut func = Function::new(); + let block0 = func.dfg.make_block(); + let block1 = func.dfg.make_block(); + let block2 = func.dfg.make_block(); + let block3 = func.dfg.make_block(); + let cond = func.dfg.append_block_param(block3, I32); + + let mut cur = FuncCursor::new(&mut func); + + cur.insert_block(block3); + let jmp_block3_block1 = cur.ins().jump(block1, &[]); + + cur.insert_block(block1); + let br_block1_block0_block2 = cur.ins().brif(cond, block0, &[], block2, &[]); + + cur.insert_block(block2); + cur.ins().jump(block0, &[]); + + cur.insert_block(block0); + + let cfg = ControlFlowGraph::with_function(cur.func); + let dt = DominatorTree::with_function(cur.func, &cfg); + + // Fall-through-first, prune-at-source DFT: + // + // block3 { + // block3:jump block1 { + // block1 { + // block1:brif block0 { + // block1:jump block2 { + // block2 { + // block2:jump block0 (seen) + // } block2 + // } block1:jump block2 + // block0 { + // } block0 + // } block1:brif block0 + // } block1 + // } block3:jump block1 + // } block3 + + assert_eq!(dt.cfg_postorder(), &[block0, block2, block1, block3]); + + assert_eq!(cur.func.layout.entry_block().unwrap(), block3); + assert_eq!(dt.idom(block3), None); + assert_eq!(dt.idom(block1).unwrap(), block3); + assert_eq!(dt.idom(block2).unwrap(), block1); + assert_eq!(dt.idom(block0).unwrap(), block1); + + assert!(dt.dominates( + br_block1_block0_block2, + br_block1_block0_block2, + &cur.func.layout + )); + assert!(!dt.dominates(br_block1_block0_block2, jmp_block3_block1, &cur.func.layout)); + assert!(dt.dominates(jmp_block3_block1, br_block1_block0_block2, &cur.func.layout)); + } + + #[test] + fn backwards_layout() { + let mut func = Function::new(); + let block0 = func.dfg.make_block(); + let block1 = func.dfg.make_block(); + let block2 = func.dfg.make_block(); + + let mut cur = FuncCursor::new(&mut func); + + cur.insert_block(block0); + let jmp02 = cur.ins().jump(block2, &[]); + + cur.insert_block(block1); + let trap = cur.ins().trap(TrapCode::unwrap_user(5)); + + cur.insert_block(block2); + let jmp21 = cur.ins().jump(block1, &[]); + + let cfg = ControlFlowGraph::with_function(cur.func); + let dt = DominatorTree::with_function(cur.func, &cfg); + + assert_eq!(cur.func.layout.entry_block(), Some(block0)); + assert_eq!(dt.idom(block0), None); + assert_eq!(dt.idom(block1), Some(block2)); + assert_eq!(dt.idom(block2), Some(block0)); + + assert!(dt.dominates(block0, block0, &cur.func.layout)); + assert!(dt.dominates(block0, jmp02, &cur.func.layout)); + assert!(dt.dominates(block0, block1, &cur.func.layout)); + assert!(dt.dominates(block0, trap, &cur.func.layout)); + assert!(dt.dominates(block0, block2, &cur.func.layout)); + assert!(dt.dominates(block0, jmp21, &cur.func.layout)); + + assert!(!dt.dominates(jmp02, block0, &cur.func.layout)); + assert!(dt.dominates(jmp02, jmp02, &cur.func.layout)); + assert!(dt.dominates(jmp02, block1, &cur.func.layout)); + assert!(dt.dominates(jmp02, trap, &cur.func.layout)); + assert!(dt.dominates(jmp02, block2, &cur.func.layout)); + assert!(dt.dominates(jmp02, jmp21, &cur.func.layout)); + + assert!(!dt.dominates(block1, block0, &cur.func.layout)); + assert!(!dt.dominates(block1, jmp02, &cur.func.layout)); + assert!(dt.dominates(block1, block1, &cur.func.layout)); + assert!(dt.dominates(block1, trap, &cur.func.layout)); + assert!(!dt.dominates(block1, block2, &cur.func.layout)); + assert!(!dt.dominates(block1, jmp21, &cur.func.layout)); + + assert!(!dt.dominates(trap, block0, &cur.func.layout)); + assert!(!dt.dominates(trap, jmp02, &cur.func.layout)); + assert!(!dt.dominates(trap, block1, &cur.func.layout)); + assert!(dt.dominates(trap, trap, &cur.func.layout)); + assert!(!dt.dominates(trap, block2, &cur.func.layout)); + assert!(!dt.dominates(trap, jmp21, &cur.func.layout)); + + assert!(!dt.dominates(block2, block0, &cur.func.layout)); + assert!(!dt.dominates(block2, jmp02, &cur.func.layout)); + assert!(dt.dominates(block2, block1, &cur.func.layout)); + assert!(dt.dominates(block2, trap, &cur.func.layout)); + assert!(dt.dominates(block2, block2, &cur.func.layout)); + assert!(dt.dominates(block2, jmp21, &cur.func.layout)); + + assert!(!dt.dominates(jmp21, block0, &cur.func.layout)); + assert!(!dt.dominates(jmp21, jmp02, &cur.func.layout)); + assert!(dt.dominates(jmp21, block1, &cur.func.layout)); + assert!(dt.dominates(jmp21, trap, &cur.func.layout)); + assert!(!dt.dominates(jmp21, block2, &cur.func.layout)); + assert!(dt.dominates(jmp21, jmp21, &cur.func.layout)); + } + + #[test] + fn insts_same_block() { + let mut func = Function::new(); + let block0 = func.dfg.make_block(); + + let mut cur = FuncCursor::new(&mut func); + + cur.insert_block(block0); + let v1 = cur.ins().iconst(I32, 1); + let v2 = cur.ins().iadd(v1, v1); + let v3 = cur.ins().iadd(v2, v2); + cur.ins().return_(&[]); + + let cfg = ControlFlowGraph::with_function(cur.func); + let dt = DominatorTree::with_function(cur.func, &cfg); + + let v1_def = cur.func.dfg.value_def(v1).unwrap_inst(); + let v2_def = cur.func.dfg.value_def(v2).unwrap_inst(); + let v3_def = cur.func.dfg.value_def(v3).unwrap_inst(); + + assert!(dt.dominates(v1_def, v2_def, &cur.func.layout)); + assert!(dt.dominates(v2_def, v3_def, &cur.func.layout)); + assert!(dt.dominates(v1_def, v3_def, &cur.func.layout)); + + assert!(!dt.dominates(v2_def, v1_def, &cur.func.layout)); + assert!(!dt.dominates(v3_def, v2_def, &cur.func.layout)); + assert!(!dt.dominates(v3_def, v1_def, &cur.func.layout)); + + assert!(dt.dominates(v2_def, v2_def, &cur.func.layout)); + assert!(dt.dominates(block0, block0, &cur.func.layout)); + + assert!(dt.dominates(block0, v1_def, &cur.func.layout)); + assert!(dt.dominates(block0, v2_def, &cur.func.layout)); + assert!(dt.dominates(block0, v3_def, &cur.func.layout)); + + assert!(!dt.dominates(v1_def, block0, &cur.func.layout)); + assert!(!dt.dominates(v2_def, block0, &cur.func.layout)); + assert!(!dt.dominates(v3_def, block0, &cur.func.layout)); + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/dominator_tree/simple.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/dominator_tree/simple.rs new file mode 100644 index 000000000..d7117cbc0 --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/dominator_tree/simple.rs @@ -0,0 +1,584 @@ +//! A Dominator Tree represented as mappings of Blocks to their immediate dominator. +//! Computed using Keith D. Cooper's "Simple, Fast Dominator Algorithm." +//! This version have been used in Cranelift for a very long time +//! and should be quite stable. Used as a baseline i.e. in verification. + +use crate::entity::SecondaryMap; +use crate::flowgraph::{BlockPredecessor, ControlFlowGraph}; +use crate::ir::{Block, Function, Layout, ProgramPoint}; +use crate::packed_option::PackedOption; +use crate::timing; +use crate::traversals::Dfs; +use alloc::vec::Vec; +use core::cmp::Ordering; + +/// RPO numbers are not first assigned in a contiguous way but as multiples of STRIDE, to leave +/// room for modifications of the dominator tree. +const STRIDE: u32 = 4; + +/// Dominator tree node. We keep one of these per block. +#[derive(Clone, Default)] +struct DomNode { + /// Number of this node in a reverse post-order traversal of the CFG, starting from 1. + /// This number is monotonic in the reverse postorder but not contiguous, since we leave + /// holes for later localized modifications of the dominator tree. + /// Unreachable nodes get number 0, all others are positive. + rpo_number: u32, + + /// The immediate dominator of this block. + /// + /// This is `None` for unreachable blocks and the entry block which doesn't have an immediate + /// dominator. + idom: PackedOption, +} + +/// The dominator tree for a single function. +pub struct SimpleDominatorTree { + nodes: SecondaryMap, + + /// CFG post-order of all reachable blocks. + postorder: Vec, + + /// Scratch traversal state used by `compute_postorder()`. + dfs: Dfs, + + valid: bool, +} + +/// Methods for querying the dominator tree. +impl SimpleDominatorTree { + /// Is `block` reachable from the entry block? + pub fn is_reachable(&self, block: Block) -> bool { + self.nodes[block].rpo_number != 0 + } + + /// Get the CFG post-order of blocks that was used to compute the dominator tree. + /// + /// Note that this post-order is not updated automatically when the CFG is modified. It is + /// computed from scratch and cached by `compute()`. + pub fn cfg_postorder(&self) -> &[Block] { + debug_assert!(self.is_valid()); + &self.postorder + } + + /// Returns the immediate dominator of `block`. + /// + /// `block_a` is said to *dominate* `block_b` if all control flow paths from the function + /// entry to `block_b` must go through `block_a`. + /// + /// The *immediate dominator* is the dominator that is closest to `block`. All other dominators + /// also dominate the immediate dominator. + /// + /// This returns `None` if `block` is not reachable from the entry block, or if it is the entry block + /// which has no dominators. + pub fn idom(&self, block: Block) -> Option { + self.nodes[block].idom.into() + } + + /// Compare two blocks relative to the reverse post-order. + pub fn rpo_cmp_block(&self, a: Block, b: Block) -> Ordering { + self.nodes[a].rpo_number.cmp(&self.nodes[b].rpo_number) + } + + /// Compare two program points relative to a reverse post-order traversal of the control-flow + /// graph. + /// + /// Return `Ordering::Less` if `a` comes before `b` in the RPO. + /// + /// If `a` and `b` belong to the same block, compare their relative position in the block. + pub fn rpo_cmp(&self, a: A, b: B, layout: &Layout) -> Ordering + where + A: Into, + B: Into, + { + let a = a.into(); + let b = b.into(); + self.rpo_cmp_block(layout.pp_block(a), layout.pp_block(b)) + .then_with(|| layout.pp_cmp(a, b)) + } + + /// Returns `true` if `a` dominates `b`. + /// + /// This means that every control-flow path from the function entry to `b` must go through `a`. + /// + /// Dominance is ill defined for unreachable blocks. This function can always determine + /// dominance for instructions in the same block, but otherwise returns `false` if either block + /// is unreachable. + /// + /// An instruction is considered to dominate itself. + /// A block is also considered to dominate itself. + pub fn dominates(&self, a: A, b: B, layout: &Layout) -> bool + where + A: Into, + B: Into, + { + let a = a.into(); + let b = b.into(); + match a { + ProgramPoint::Block(block_a) => match b { + ProgramPoint::Block(block_b) => self.block_dominates(block_a, block_b), + ProgramPoint::Inst(inst_b) => { + let block_b = layout + .inst_block(inst_b) + .expect("Instruction not in layout."); + self.block_dominates(block_a, block_b) + } + }, + ProgramPoint::Inst(inst_a) => { + let block_a: Block = layout + .inst_block(inst_a) + .expect("Instruction not in layout."); + match b { + ProgramPoint::Block(block_b) => { + block_a != block_b && self.block_dominates(block_a, block_b) + } + ProgramPoint::Inst(inst_b) => { + let block_b = layout + .inst_block(inst_b) + .expect("Instruction not in layout."); + if block_a == block_b { + layout.pp_cmp(a, b) != Ordering::Greater + } else { + self.block_dominates(block_a, block_b) + } + } + } + } + } + } + + /// Returns `true` if `block_a` dominates `block_b`. + /// + /// A block is considered to dominate itself. + fn block_dominates(&self, block_a: Block, mut block_b: Block) -> bool { + let rpo_a = self.nodes[block_a].rpo_number; + + // Run a finger up the dominator tree from b until we see a. + // Do nothing if b is unreachable. + while rpo_a < self.nodes[block_b].rpo_number { + let idom = match self.idom(block_b) { + Some(idom) => idom, + None => return false, // a is unreachable, so we climbed past the entry + }; + block_b = idom; + } + + block_a == block_b + } + + /// Compute the common dominator of two basic blocks. + /// + /// Both basic blocks are assumed to be reachable. + fn common_dominator(&self, mut a: Block, mut b: Block) -> Block { + loop { + match self.rpo_cmp_block(a, b) { + Ordering::Less => { + // `a` comes before `b` in the RPO. Move `b` up. + let idom = self.nodes[b].idom.expect("Unreachable basic block?"); + b = idom; + } + Ordering::Greater => { + // `b` comes before `a` in the RPO. Move `a` up. + let idom = self.nodes[a].idom.expect("Unreachable basic block?"); + a = idom; + } + Ordering::Equal => break, + } + } + + debug_assert_eq!(a, b, "Unreachable block passed to common_dominator?"); + + a + } +} + +impl SimpleDominatorTree { + /// Allocate a new blank dominator tree. Use `compute` to compute the dominator tree for a + /// function. + pub fn new() -> Self { + Self { + nodes: SecondaryMap::new(), + postorder: Vec::new(), + dfs: Dfs::new(), + valid: false, + } + } + + /// Allocate and compute a dominator tree. + pub fn with_function(func: &Function, cfg: &ControlFlowGraph) -> Self { + let block_capacity = func.layout.block_capacity(); + let mut domtree = Self { + nodes: SecondaryMap::with_capacity(block_capacity), + postorder: Vec::with_capacity(block_capacity), + dfs: Dfs::new(), + valid: false, + }; + domtree.compute(func, cfg); + domtree + } + + /// Reset and compute a CFG post-order and dominator tree. + pub fn compute(&mut self, func: &Function, cfg: &ControlFlowGraph) { + let _tt = timing::domtree(); + debug_assert!(cfg.is_valid()); + self.compute_postorder(func); + self.compute_domtree(func, cfg); + self.valid = true; + } + + /// Clear the data structures used to represent the dominator tree. This will leave the tree in + /// a state where `is_valid()` returns false. + pub fn clear(&mut self) { + self.nodes.clear(); + self.postorder.clear(); + self.valid = false; + } + + /// Check if the dominator tree is in a valid state. + /// + /// Note that this doesn't perform any kind of validity checks. It simply checks if the + /// `compute()` method has been called since the last `clear()`. It does not check that the + /// dominator tree is consistent with the CFG. + pub fn is_valid(&self) -> bool { + self.valid + } + + /// Reset all internal data structures and compute a post-order of the control flow graph. + /// + /// This leaves `rpo_number == 1` for all reachable blocks, 0 for unreachable ones. + fn compute_postorder(&mut self, func: &Function) { + self.clear(); + self.nodes.resize(func.dfg.num_blocks()); + self.postorder.extend(self.dfs.post_order_iter(func)); + } + + /// Build a dominator tree from a control flow graph using Keith D. Cooper's + /// "Simple, Fast Dominator Algorithm." + fn compute_domtree(&mut self, func: &Function, cfg: &ControlFlowGraph) { + // During this algorithm, `rpo_number` has the following values: + // + // 0: block is not reachable. + // 1: block is reachable, but has not yet been visited during the first pass. This is set by + // `compute_postorder`. + // 2+: block is reachable and has an assigned RPO number. + + // We'll be iterating over a reverse post-order of the CFG, skipping the entry block. + let (entry_block, postorder) = match self.postorder.as_slice().split_last() { + Some((&eb, rest)) => (eb, rest), + None => return, + }; + debug_assert_eq!(Some(entry_block), func.layout.entry_block()); + + // Do a first pass where we assign RPO numbers to all reachable nodes. + self.nodes[entry_block].rpo_number = 2 * STRIDE; + for (rpo_idx, &block) in postorder.iter().rev().enumerate() { + // Update the current node and give it an RPO number. + // The entry block got 2, the rest start at 3 by multiples of STRIDE to leave + // room for future dominator tree modifications. + // + // Since `compute_idom` will only look at nodes with an assigned RPO number, the + // function will never see an uninitialized predecessor. + // + // Due to the nature of the post-order traversal, every node we visit will have at + // least one predecessor that has previously been visited during this RPO. + self.nodes[block] = DomNode { + idom: self.compute_idom(block, cfg).into(), + rpo_number: (rpo_idx as u32 + 3) * STRIDE, + } + } + + // Now that we have RPO numbers for everything and initial immediate dominator estimates, + // iterate until convergence. + // + // If the function is free of irreducible control flow, this will exit after one iteration. + let mut changed = true; + while changed { + changed = false; + for &block in postorder.iter().rev() { + let idom = self.compute_idom(block, cfg).into(); + if self.nodes[block].idom != idom { + self.nodes[block].idom = idom; + changed = true; + } + } + } + } + + // Compute the immediate dominator for `block` using the current `idom` states for the reachable + // nodes. + fn compute_idom(&self, block: Block, cfg: &ControlFlowGraph) -> Block { + // Get an iterator with just the reachable, already visited predecessors to `block`. + // Note that during the first pass, `rpo_number` is 1 for reachable blocks that haven't + // been visited yet, 0 for unreachable blocks. + let mut reachable_preds = cfg + .pred_iter(block) + .filter(|&BlockPredecessor { block: pred, .. }| self.nodes[pred].rpo_number > 1) + .map(|pred| pred.block); + + // The RPO must visit at least one predecessor before this node. + let mut idom = reachable_preds + .next() + .expect("block node must have one reachable predecessor"); + + for pred in reachable_preds { + idom = self.common_dominator(idom, pred); + } + + idom + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::cursor::{Cursor, FuncCursor}; + use crate::ir::types::*; + use crate::ir::{InstBuilder, TrapCode}; + + #[test] + fn empty() { + let func = Function::new(); + let cfg = ControlFlowGraph::with_function(&func); + debug_assert!(cfg.is_valid()); + let dtree = SimpleDominatorTree::with_function(&func, &cfg); + assert_eq!(0, dtree.nodes.keys().count()); + assert_eq!(dtree.cfg_postorder(), &[]); + } + + #[test] + fn unreachable_node() { + let mut func = Function::new(); + let block0 = func.dfg.make_block(); + let v0 = func.dfg.append_block_param(block0, I32); + let block1 = func.dfg.make_block(); + let block2 = func.dfg.make_block(); + let trap_block = func.dfg.make_block(); + + let mut cur = FuncCursor::new(&mut func); + + cur.insert_block(block0); + cur.ins().brif(v0, block2, &[], trap_block, &[]); + + cur.insert_block(trap_block); + cur.ins().trap(TrapCode::unwrap_user(1)); + + cur.insert_block(block1); + let v1 = cur.ins().iconst(I32, 1); + let v2 = cur.ins().iadd(v0, v1); + cur.ins().jump(block0, &[v2]); + + cur.insert_block(block2); + cur.ins().return_(&[v0]); + + let cfg = ControlFlowGraph::with_function(cur.func); + let dt = SimpleDominatorTree::with_function(cur.func, &cfg); + + // Fall-through-first, prune-at-source DFT: + // + // block0 { + // brif block2 { + // trap + // block2 { + // return + // } block2 + // } block0 + assert_eq!(dt.cfg_postorder(), &[block2, trap_block, block0]); + + let v2_def = cur.func.dfg.value_def(v2).unwrap_inst(); + assert!(!dt.dominates(v2_def, block0, &cur.func.layout)); + assert!(!dt.dominates(block0, v2_def, &cur.func.layout)); + + assert!(dt.dominates(block0, block0, &cur.func.layout)); + assert!(!dt.dominates(block0, block1, &cur.func.layout)); + assert!(dt.dominates(block0, block2, &cur.func.layout)); + assert!(!dt.dominates(block1, block0, &cur.func.layout)); + assert!(dt.dominates(block1, block1, &cur.func.layout)); + assert!(!dt.dominates(block1, block2, &cur.func.layout)); + assert!(!dt.dominates(block2, block0, &cur.func.layout)); + assert!(!dt.dominates(block2, block1, &cur.func.layout)); + assert!(dt.dominates(block2, block2, &cur.func.layout)); + } + + #[test] + fn non_zero_entry_block() { + let mut func = Function::new(); + let block0 = func.dfg.make_block(); + let block1 = func.dfg.make_block(); + let block2 = func.dfg.make_block(); + let block3 = func.dfg.make_block(); + let cond = func.dfg.append_block_param(block3, I32); + + let mut cur = FuncCursor::new(&mut func); + + cur.insert_block(block3); + let jmp_block3_block1 = cur.ins().jump(block1, &[]); + + cur.insert_block(block1); + let br_block1_block0_block2 = cur.ins().brif(cond, block0, &[], block2, &[]); + + cur.insert_block(block2); + cur.ins().jump(block0, &[]); + + cur.insert_block(block0); + + let cfg = ControlFlowGraph::with_function(cur.func); + let dt = SimpleDominatorTree::with_function(cur.func, &cfg); + + // Fall-through-first, prune-at-source DFT: + // + // block3 { + // block3:jump block1 { + // block1 { + // block1:brif block0 { + // block1:jump block2 { + // block2 { + // block2:jump block0 (seen) + // } block2 + // } block1:jump block2 + // block0 { + // } block0 + // } block1:brif block0 + // } block1 + // } block3:jump block1 + // } block3 + + assert_eq!(dt.cfg_postorder(), &[block0, block2, block1, block3]); + + assert_eq!(cur.func.layout.entry_block().unwrap(), block3); + assert_eq!(dt.idom(block3), None); + assert_eq!(dt.idom(block1).unwrap(), block3); + assert_eq!(dt.idom(block2).unwrap(), block1); + assert_eq!(dt.idom(block0).unwrap(), block1); + + assert!(dt.dominates( + br_block1_block0_block2, + br_block1_block0_block2, + &cur.func.layout + )); + assert!(!dt.dominates(br_block1_block0_block2, jmp_block3_block1, &cur.func.layout)); + assert!(dt.dominates(jmp_block3_block1, br_block1_block0_block2, &cur.func.layout)); + + assert_eq!( + dt.rpo_cmp(block3, block3, &cur.func.layout), + Ordering::Equal + ); + assert_eq!(dt.rpo_cmp(block3, block1, &cur.func.layout), Ordering::Less); + assert_eq!( + dt.rpo_cmp(block3, jmp_block3_block1, &cur.func.layout), + Ordering::Less + ); + assert_eq!( + dt.rpo_cmp(jmp_block3_block1, br_block1_block0_block2, &cur.func.layout), + Ordering::Less + ); + } + + #[test] + fn backwards_layout() { + let mut func = Function::new(); + let block0 = func.dfg.make_block(); + let block1 = func.dfg.make_block(); + let block2 = func.dfg.make_block(); + + let mut cur = FuncCursor::new(&mut func); + + cur.insert_block(block0); + let jmp02 = cur.ins().jump(block2, &[]); + + cur.insert_block(block1); + let trap = cur.ins().trap(TrapCode::unwrap_user(5)); + + cur.insert_block(block2); + let jmp21 = cur.ins().jump(block1, &[]); + + let cfg = ControlFlowGraph::with_function(cur.func); + let dt = SimpleDominatorTree::with_function(cur.func, &cfg); + + assert_eq!(cur.func.layout.entry_block(), Some(block0)); + assert_eq!(dt.idom(block0), None); + assert_eq!(dt.idom(block1), Some(block2)); + assert_eq!(dt.idom(block2), Some(block0)); + + assert!(dt.dominates(block0, block0, &cur.func.layout)); + assert!(dt.dominates(block0, jmp02, &cur.func.layout)); + assert!(dt.dominates(block0, block1, &cur.func.layout)); + assert!(dt.dominates(block0, trap, &cur.func.layout)); + assert!(dt.dominates(block0, block2, &cur.func.layout)); + assert!(dt.dominates(block0, jmp21, &cur.func.layout)); + + assert!(!dt.dominates(jmp02, block0, &cur.func.layout)); + assert!(dt.dominates(jmp02, jmp02, &cur.func.layout)); + assert!(dt.dominates(jmp02, block1, &cur.func.layout)); + assert!(dt.dominates(jmp02, trap, &cur.func.layout)); + assert!(dt.dominates(jmp02, block2, &cur.func.layout)); + assert!(dt.dominates(jmp02, jmp21, &cur.func.layout)); + + assert!(!dt.dominates(block1, block0, &cur.func.layout)); + assert!(!dt.dominates(block1, jmp02, &cur.func.layout)); + assert!(dt.dominates(block1, block1, &cur.func.layout)); + assert!(dt.dominates(block1, trap, &cur.func.layout)); + assert!(!dt.dominates(block1, block2, &cur.func.layout)); + assert!(!dt.dominates(block1, jmp21, &cur.func.layout)); + + assert!(!dt.dominates(trap, block0, &cur.func.layout)); + assert!(!dt.dominates(trap, jmp02, &cur.func.layout)); + assert!(!dt.dominates(trap, block1, &cur.func.layout)); + assert!(dt.dominates(trap, trap, &cur.func.layout)); + assert!(!dt.dominates(trap, block2, &cur.func.layout)); + assert!(!dt.dominates(trap, jmp21, &cur.func.layout)); + + assert!(!dt.dominates(block2, block0, &cur.func.layout)); + assert!(!dt.dominates(block2, jmp02, &cur.func.layout)); + assert!(dt.dominates(block2, block1, &cur.func.layout)); + assert!(dt.dominates(block2, trap, &cur.func.layout)); + assert!(dt.dominates(block2, block2, &cur.func.layout)); + assert!(dt.dominates(block2, jmp21, &cur.func.layout)); + + assert!(!dt.dominates(jmp21, block0, &cur.func.layout)); + assert!(!dt.dominates(jmp21, jmp02, &cur.func.layout)); + assert!(dt.dominates(jmp21, block1, &cur.func.layout)); + assert!(dt.dominates(jmp21, trap, &cur.func.layout)); + assert!(!dt.dominates(jmp21, block2, &cur.func.layout)); + assert!(dt.dominates(jmp21, jmp21, &cur.func.layout)); + } + + #[test] + fn insts_same_block() { + let mut func = Function::new(); + let block0 = func.dfg.make_block(); + + let mut cur = FuncCursor::new(&mut func); + + cur.insert_block(block0); + let v1 = cur.ins().iconst(I32, 1); + let v2 = cur.ins().iadd(v1, v1); + let v3 = cur.ins().iadd(v2, v2); + cur.ins().return_(&[]); + + let cfg = ControlFlowGraph::with_function(cur.func); + let dt = SimpleDominatorTree::with_function(cur.func, &cfg); + + let v1_def = cur.func.dfg.value_def(v1).unwrap_inst(); + let v2_def = cur.func.dfg.value_def(v2).unwrap_inst(); + let v3_def = cur.func.dfg.value_def(v3).unwrap_inst(); + + assert!(dt.dominates(v1_def, v2_def, &cur.func.layout)); + assert!(dt.dominates(v2_def, v3_def, &cur.func.layout)); + assert!(dt.dominates(v1_def, v3_def, &cur.func.layout)); + + assert!(!dt.dominates(v2_def, v1_def, &cur.func.layout)); + assert!(!dt.dominates(v3_def, v2_def, &cur.func.layout)); + assert!(!dt.dominates(v3_def, v1_def, &cur.func.layout)); + + assert!(dt.dominates(v2_def, v2_def, &cur.func.layout)); + assert!(dt.dominates(block0, block0, &cur.func.layout)); + + assert!(dt.dominates(block0, v1_def, &cur.func.layout)); + assert!(dt.dominates(block0, v2_def, &cur.func.layout)); + assert!(dt.dominates(block0, v3_def, &cur.func.layout)); + + assert!(!dt.dominates(v1_def, block0, &cur.func.layout)); + assert!(!dt.dominates(v2_def, block0, &cur.func.layout)); + assert!(!dt.dominates(v3_def, block0, &cur.func.layout)); + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/egraph.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/egraph.rs new file mode 100644 index 000000000..1aac6c3b8 --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/egraph.rs @@ -0,0 +1,833 @@ +//! Support for egraphs represented in the DataFlowGraph. + +use crate::alias_analysis::{AliasAnalysis, LastStores}; +use crate::ctxhash::{CtxEq, CtxHash, NullCtx}; +use crate::cursor::{Cursor, CursorPosition, FuncCursor}; +use crate::dominator_tree::{DominatorTree, DominatorTreePreorder}; +use crate::egraph::elaborate::Elaborator; +use crate::inst_predicates::{is_mergeable_for_egraph, is_pure_for_egraph}; +use crate::ir::pcc::Fact; +use crate::ir::{ + Block, DataFlowGraph, Function, Inst, InstructionData, Opcode, Type, Value, ValueDef, + ValueListPool, +}; +use crate::loop_analysis::LoopAnalysis; +use crate::opts::IsleContext; +use crate::scoped_hash_map::{Entry as ScopedEntry, ScopedHashMap}; +use crate::settings::Flags; +use crate::trace; +use alloc::vec::Vec; +use core::cmp::Ordering; +use core::hash::Hasher; +use cranelift_control::ControlPlane; +use cranelift_entity::packed_option::ReservedValue; +use cranelift_entity::SecondaryMap; +use rustc_hash::FxHashSet; +use smallvec::SmallVec; + +mod cost; +mod elaborate; + +/// Pass over a Function that does the whole aegraph thing. +/// +/// - Removes non-skeleton nodes from the Layout. +/// - Performs a GVN-and-rule-application pass over all Values +/// reachable from the skeleton, potentially creating new Union +/// nodes (i.e., an aegraph) so that some values have multiple +/// representations. +/// - Does "extraction" on the aegraph: selects the best value out of +/// the tree-of-Union nodes for each used value. +/// - Does "scoped elaboration" on the aegraph: chooses one or more +/// locations for pure nodes to become instructions again in the +/// layout, as forced by the skeleton. +/// +/// At the beginning and end of this pass, the CLIF should be in a +/// state that passes the verifier and, additionally, has no Union +/// nodes. During the pass, Union nodes may exist, and instructions in +/// the layout may refer to results of instructions that are not +/// placed in the layout. +pub struct EgraphPass<'a> { + /// The function we're operating on. + func: &'a mut Function, + /// Dominator tree for the CFG, used to visit blocks in pre-order + /// so we see value definitions before their uses, and also used for + /// O(1) dominance checks. + domtree: DominatorTreePreorder, + /// Alias analysis, used during optimization. + alias_analysis: &'a mut AliasAnalysis<'a>, + /// Loop analysis results, used for built-in LICM during + /// elaboration. + loop_analysis: &'a LoopAnalysis, + /// Compiler flags. + flags: &'a Flags, + /// Chaos-mode control-plane so we can test that we still get + /// correct results when our heuristics make bad decisions. + ctrl_plane: &'a mut ControlPlane, + /// Which Values do we want to rematerialize in each block where + /// they're used? + remat_values: FxHashSet, + /// Stats collected while we run this pass. + pub(crate) stats: Stats, +} + +// The maximum number of rewrites we will take from a single call into ISLE. +const MATCHES_LIMIT: usize = 5; + +/// Context passed through node insertion and optimization. +pub(crate) struct OptimizeCtx<'opt, 'analysis> +where + 'analysis: 'opt, +{ + // Borrowed from EgraphPass: + pub(crate) func: &'opt mut Function, + pub(crate) value_to_opt_value: &'opt mut SecondaryMap, + available_block: &'opt mut SecondaryMap, + pub(crate) gvn_map: &'opt mut ScopedHashMap<(Type, InstructionData), Option>, + pub(crate) gvn_map_blocks: &'opt Vec, + pub(crate) remat_values: &'opt mut FxHashSet, + pub(crate) stats: &'opt mut Stats, + domtree: &'opt DominatorTreePreorder, + pub(crate) alias_analysis: &'opt mut AliasAnalysis<'analysis>, + pub(crate) alias_analysis_state: &'opt mut LastStores, + flags: &'opt Flags, + ctrl_plane: &'opt mut ControlPlane, + // Held locally during optimization of one node (recursively): + pub(crate) rewrite_depth: usize, + pub(crate) subsume_values: FxHashSet, + optimized_values: SmallVec<[Value; MATCHES_LIMIT]>, +} + +/// For passing to `insert_pure_enode`. Sometimes the enode already +/// exists as an Inst (from the original CLIF), and sometimes we're in +/// the middle of creating it and want to avoid inserting it if +/// possible until we know we need it. +pub(crate) enum NewOrExistingInst { + New(InstructionData, Type), + Existing(Inst), +} + +impl NewOrExistingInst { + fn get_inst_key<'a>(&'a self, dfg: &'a DataFlowGraph) -> (Type, InstructionData) { + match self { + NewOrExistingInst::New(data, ty) => (*ty, *data), + NewOrExistingInst::Existing(inst) => { + let ty = dfg.ctrl_typevar(*inst); + (ty, dfg.insts[*inst]) + } + } + } +} + +impl<'opt, 'analysis> OptimizeCtx<'opt, 'analysis> +where + 'analysis: 'opt, +{ + /// Optimization of a single instruction. + /// + /// This does a few things: + /// - Looks up the instruction in the GVN deduplication map. If we + /// already have the same instruction somewhere else, with the + /// same args, then we can alias the original instruction's + /// results and omit this instruction entirely. + /// - If the instruction is "new" (not deduplicated), then apply + /// optimization rules: + /// - All of the mid-end rules written in ISLE. + /// - Store-to-load forwarding. + /// - Update the value-to-opt-value map, and update the eclass + /// union-find, if we rewrote the value to different form(s). + pub(crate) fn insert_pure_enode(&mut self, inst: NewOrExistingInst) -> Value { + // Create the external context for looking up and updating the + // GVN map. This is necessary so that instructions themselves + // do not have to carry all the references or data for a full + // `Eq` or `Hash` impl. + let gvn_context = GVNContext { + value_lists: &self.func.dfg.value_lists, + }; + + self.stats.pure_inst += 1; + if let NewOrExistingInst::New(..) = inst { + self.stats.new_inst += 1; + } + + // Does this instruction already exist? If so, add entries to + // the value-map to rewrite uses of its results to the results + // of the original (existing) instruction. If not, optimize + // the new instruction. + if let Some(&Some(orig_result)) = self + .gvn_map + .get(&gvn_context, &inst.get_inst_key(&self.func.dfg)) + { + self.stats.pure_inst_deduped += 1; + if let NewOrExistingInst::Existing(inst) = inst { + debug_assert_eq!(self.func.dfg.inst_results(inst).len(), 1); + let result = self.func.dfg.first_result(inst); + self.value_to_opt_value[result] = orig_result; + self.available_block[result] = self.available_block[orig_result]; + self.func.dfg.merge_facts(result, orig_result); + } + orig_result + } else { + // Now actually insert the InstructionData and attach + // result value (exactly one). + let (inst, result, ty) = match inst { + NewOrExistingInst::New(data, typevar) => { + self.stats.pure_inst_insert_new += 1; + let inst = self.func.dfg.make_inst(data); + // TODO: reuse return value? + self.func.dfg.make_inst_results(inst, typevar); + let result = self.func.dfg.first_result(inst); + // New inst. We need to do the analysis of its result. + (inst, result, typevar) + } + NewOrExistingInst::Existing(inst) => { + self.stats.pure_inst_insert_orig += 1; + let result = self.func.dfg.first_result(inst); + let ty = self.func.dfg.ctrl_typevar(inst); + (inst, result, ty) + } + }; + + self.attach_constant_fact(inst, result, ty); + + self.available_block[result] = self.get_available_block(inst); + let opt_value = self.optimize_pure_enode(inst); + log::trace!( + "optimizing inst {} orig result {} gave {}", + inst, + result, + opt_value + ); + + let gvn_context = GVNContext { + value_lists: &self.func.dfg.value_lists, + }; + // Insert at level implied by args. This enables merging + // in LICM cases like: + // + // while (...) { + // if (...) { + // let x = loop_invariant_expr; + // } + // if (...) { + // let x = loop_invariant_expr; + // } + // } + // + // where the two instances of the expression otherwise + // wouldn't merge because each would be in a separate + // subscope of the scoped hashmap during traversal. + log::trace!( + "value {} is available at {}", + opt_value, + self.available_block[opt_value] + ); + let depth = self.depth_of_block_in_gvn_map(self.available_block[opt_value]); + self.gvn_map.insert_with_depth( + &gvn_context, + (ty, self.func.dfg.insts[inst]), + Some(opt_value), + depth, + ); + self.value_to_opt_value[result] = opt_value; + opt_value + } + } + + /// Find the block where a pure instruction first becomes available, + /// defined as the block that is closest to the root where all of + /// its arguments are available. In the unusual case where a pure + /// instruction has no arguments (e.g. get_return_address), we can + /// place it anywhere, so it is available in the entry block. + /// + /// This function does not compute available blocks recursively. + /// All of the instruction's arguments must have had their available + /// blocks assigned already. + fn get_available_block(&self, inst: Inst) -> Block { + // Side-effecting instructions have different rules for where + // they become available, so this function does not apply. + debug_assert!(is_pure_for_egraph(self.func, inst)); + + // Note that the def-point of all arguments to an instruction + // in SSA lie on a line of direct ancestors in the domtree, and + // so do their available-blocks. This means that for any pair of + // arguments, their available blocks are either the same or one + // strictly dominates the other. We just need to find any argument + // whose available block is deepest in the domtree. + self.func.dfg.insts[inst] + .arguments(&self.func.dfg.value_lists) + .iter() + .map(|&v| { + let block = self.available_block[v]; + debug_assert!(!block.is_reserved_value()); + block + }) + .max_by(|&x, &y| { + if self.domtree.dominates(x, y) { + Ordering::Less + } else { + debug_assert!(self.domtree.dominates(y, x)); + Ordering::Greater + } + }) + .unwrap_or(self.func.layout.entry_block().unwrap()) + } + + fn depth_of_block_in_gvn_map(&self, block: Block) -> usize { + log::trace!( + "finding depth of available block {} in domtree stack: {:?}", + block, + self.gvn_map_blocks + ); + self.gvn_map_blocks + .iter() + .enumerate() + .rev() + .find(|&(_, b)| *b == block) + .unwrap() + .0 + } + + /// Optimizes an enode by applying any matching mid-end rewrite + /// rules (or store-to-load forwarding, which is a special case), + /// unioning together all possible optimized (or rewritten) forms + /// of this expression into an eclass and returning the `Value` + /// that represents that eclass. + fn optimize_pure_enode(&mut self, inst: Inst) -> Value { + // A pure node always has exactly one result. + let orig_value = self.func.dfg.first_result(inst); + + let mut optimized_values = std::mem::take(&mut self.optimized_values); + + // Limit rewrite depth. When we apply optimization rules, they + // may create new nodes (values) and those are, recursively, + // optimized eagerly as soon as they are created. So we may + // have more than one ISLE invocation on the stack. (This is + // necessary so that as the toplevel builds the + // right-hand-side expression bottom-up, it uses the "latest" + // optimized values for all the constituent parts.) To avoid + // infinite or problematic recursion, we bound the rewrite + // depth to a small constant here. + const REWRITE_LIMIT: usize = 5; + if self.rewrite_depth > REWRITE_LIMIT { + self.stats.rewrite_depth_limit += 1; + return orig_value; + } + self.rewrite_depth += 1; + trace!("Incrementing rewrite depth; now {}", self.rewrite_depth); + + // Invoke the ISLE toplevel constructor, getting all new + // values produced as equivalents to this value. + trace!("Calling into ISLE with original value {}", orig_value); + self.stats.rewrite_rule_invoked += 1; + debug_assert!(optimized_values.is_empty()); + crate::opts::generated_code::constructor_simplify( + &mut IsleContext { ctx: self }, + orig_value, + &mut optimized_values, + ); + + self.stats.rewrite_rule_results += optimized_values.len() as u64; + + // It's not supposed to matter what order `simplify` returns values in. + self.ctrl_plane.shuffle(&mut optimized_values); + + let num_matches = optimized_values.len(); + if num_matches > MATCHES_LIMIT { + trace!( + "Reached maximum matches limit; too many optimized values \ + ({num_matches} > {MATCHES_LIMIT}); ignoring rest.", + ); + optimized_values.truncate(MATCHES_LIMIT); + } + + trace!(" -> returned from ISLE: {orig_value} -> {optimized_values:?}"); + + // Construct a union-node tree representing the new eclass + // that results from rewriting. If any returned value was + // marked "subsume", take only that value. Otherwise, + // sequentially build the chain over the original value and + // all returned values. + let result_value = if let Some(&subsuming_value) = optimized_values + .iter() + .find(|&value| self.subsume_values.contains(value)) + { + optimized_values.clear(); + self.stats.pure_inst_subsume += 1; + subsuming_value + } else { + let mut union_value = orig_value; + for optimized_value in optimized_values.drain(..) { + trace!( + "Returned from ISLE for {}, got {:?}", + orig_value, + optimized_value + ); + if optimized_value == orig_value { + trace!(" -> same as orig value; skipping"); + self.stats.pure_inst_rewrite_to_self += 1; + continue; + } + let old_union_value = union_value; + union_value = self.func.dfg.union(old_union_value, optimized_value); + self.stats.union += 1; + trace!(" -> union: now {}", union_value); + self.func.dfg.merge_facts(old_union_value, optimized_value); + self.available_block[union_value] = + self.merge_availability(old_union_value, optimized_value); + } + union_value + }; + + self.rewrite_depth -= 1; + trace!("Decrementing rewrite depth; now {}", self.rewrite_depth); + if self.rewrite_depth == 0 { + self.subsume_values.clear(); + } + + debug_assert!(self.optimized_values.is_empty()); + self.optimized_values = optimized_values; + + result_value + } + + fn merge_availability(&self, a: Value, b: Value) -> Block { + let a = self.available_block[a]; + let b = self.available_block[b]; + if self.domtree.dominates(a, b) { + a + } else { + b + } + } + + /// Optimize a "skeleton" instruction, possibly removing + /// it. Returns `true` if the instruction should be removed from + /// the layout. + fn optimize_skeleton_inst(&mut self, inst: Inst, block: Block) -> bool { + self.stats.skeleton_inst += 1; + + // First, can we try to deduplicate? We need to keep some copy + // of the instruction around because it's side-effecting, but + // we may be able to reuse an earlier instance of it. + if is_mergeable_for_egraph(self.func, inst) { + let result = self.func.dfg.inst_results(inst).get(0).copied(); + trace!(" -> mergeable side-effecting op {}", inst); + + // Does this instruction already exist? If so, add entries to + // the value-map to rewrite uses of its results to the results + // of the original (existing) instruction. If not, optimize + // the new instruction. + // + // Note that the GVN map is scoped, which is important + // here: because effectful ops are not removed from the + // skeleton (`Layout`), we need to be mindful of whether + // our current position is dominated by an instance of the + // instruction. (See #5796 for details.) + let ty = self.func.dfg.ctrl_typevar(inst); + match self + .gvn_map + .entry(&NullCtx, (ty, self.func.dfg.insts[inst])) + { + ScopedEntry::Occupied(o) => { + let orig_result = *o.get(); + match (result, orig_result) { + (Some(result), Some(orig_result)) => { + // Hit in GVN map -- reuse value. + self.stats.skeleton_inst_gvn += 1; + self.value_to_opt_value[result] = orig_result; + self.available_block[result] = self.available_block[orig_result]; + trace!(" -> merges result {} to {}", result, orig_result); + } + (None, None) => { + // Hit in the GVN map, but the instruction doesn't + // produce results, only side effects. Nothing else + // to do here. + self.stats.skeleton_inst_gvn += 1; + trace!(" -> merges with dominating instruction"); + } + (_, _) => unreachable!(), + } + true + } + ScopedEntry::Vacant(v) => { + // Otherwise, insert it into the value-map. + if let Some(result) = result { + self.value_to_opt_value[result] = result; + self.available_block[result] = block; + } + v.insert(result); + trace!(" -> inserts as new (no GVN)"); + false + } + } + } + // Otherwise, if a load or store, process it with the alias + // analysis to see if we can optimize it (rewrite in terms of + // an earlier load or stored value). + else if let Some(new_result) = + self.alias_analysis + .process_inst(self.func, self.alias_analysis_state, inst) + { + self.stats.alias_analysis_removed += 1; + let result = self.func.dfg.first_result(inst); + trace!( + " -> inst {} has result {} replaced with {}", + inst, + result, + new_result + ); + self.value_to_opt_value[result] = new_result; + self.available_block[result] = self.available_block[new_result]; + self.func.dfg.merge_facts(result, new_result); + true + } + // Otherwise, generic side-effecting op -- always keep it, and + // set its results to identity-map to original values. + else { + // Set all results to identity-map to themselves + // in the value-to-opt-value map. + for &result in self.func.dfg.inst_results(inst) { + self.value_to_opt_value[result] = result; + self.available_block[result] = block; + } + false + } + } + + /// Helper to propagate facts on constant values: if PCC is + /// enabled, then unconditionally add a fact attesting to the + /// Value's concrete value. + fn attach_constant_fact(&mut self, inst: Inst, value: Value, ty: Type) { + if self.flags.enable_pcc() { + if let InstructionData::UnaryImm { + opcode: Opcode::Iconst, + imm, + } = self.func.dfg.insts[inst] + { + let imm: i64 = imm.into(); + self.func.dfg.facts[value] = + Some(Fact::constant(ty.bits().try_into().unwrap(), imm as u64)); + } + } + } +} + +impl<'a> EgraphPass<'a> { + /// Create a new EgraphPass. + pub fn new( + func: &'a mut Function, + raw_domtree: &'a DominatorTree, + loop_analysis: &'a LoopAnalysis, + alias_analysis: &'a mut AliasAnalysis<'a>, + flags: &'a Flags, + ctrl_plane: &'a mut ControlPlane, + ) -> Self { + let mut domtree = DominatorTreePreorder::new(); + domtree.compute(raw_domtree); + Self { + func, + domtree, + loop_analysis, + alias_analysis, + flags, + ctrl_plane, + stats: Stats::default(), + remat_values: FxHashSet::default(), + } + } + + /// Run the process. + pub fn run(&mut self) { + self.remove_pure_and_optimize(); + + trace!("egraph built:\n{}\n", self.func.display()); + if cfg!(feature = "trace-log") { + for (value, def) in self.func.dfg.values_and_defs() { + trace!(" -> {} = {:?}", value, def); + match def { + ValueDef::Result(i, 0) => { + trace!(" -> {} = {:?}", i, self.func.dfg.insts[i]); + } + _ => {} + } + } + } + + self.elaborate(); + + log::trace!("stats: {:#?}", self.stats); + } + + /// Remove pure nodes from the `Layout` of the function, ensuring + /// that only the "side-effect skeleton" remains, and also + /// optimize the pure nodes. This is the first step of + /// egraph-based processing and turns the pure CFG-based CLIF into + /// a CFG skeleton with a sea of (optimized) nodes tying it + /// together. + /// + /// As we walk through the code, we eagerly apply optimization + /// rules; at any given point we have a "latest version" of an + /// eclass of possible representations for a `Value` in the + /// original program, which is itself a `Value` at the root of a + /// union-tree. We keep a map from the original values to these + /// optimized values. When we encounter any instruction (pure or + /// side-effecting skeleton) we rewrite its arguments to capture + /// the "latest" optimized forms of these values. (We need to do + /// this as part of this pass, and not later using a finished map, + /// because the eclass can continue to be updated and we need to + /// only refer to its subset that exists at this stage, to + /// maintain acyclicity.) + fn remove_pure_and_optimize(&mut self) { + let mut cursor = FuncCursor::new(self.func); + let mut value_to_opt_value: SecondaryMap = + SecondaryMap::with_default(Value::reserved_value()); + + // Map from instruction to value for hash-consing of pure ops + // into the egraph. This can be a standard (non-scoped) + // hashmap because pure ops have no location: they are + // "outside of" control flow. + // + // Note also that we keep the controlling typevar (the `Type` + // in the tuple below) because it may disambiguate + // instructions that are identical except for type. + // + // We store both skeleton and non-skeleton instructions in the + // GVN map; for skeleton instructions, we only store those + // that are idempotent, i.e., still eligible to GVN. Note that + // some skeleton instructions are idempotent but do not + // produce a value: e.g., traps on a given condition. To allow + // for both cases, we store an `Option` as the value in + // this map. + let mut gvn_map: ScopedHashMap<(Type, InstructionData), Option> = + ScopedHashMap::with_capacity(cursor.func.dfg.num_values()); + + // The block in the domtree preorder traversal at each level + // of the GVN map. + let mut gvn_map_blocks: Vec = vec![]; + + // To get the best possible merging and canonicalization, we + // track where a value is "available" at: this is the + // domtree-nearest-ancestor join of all args if the value + // itself is pure, otherwise the block where the value is + // defined. (And for union nodes, the + // domtree-highest-ancestor, i.e., the meet or the dual to the + // above join.) + let mut available_block: SecondaryMap = + SecondaryMap::with_default(Block::reserved_value()); + + // This is an initial guess at the size we'll need, but we add + // more values as we build simplified alternative expressions so + // this is likely to realloc again later. + available_block.resize(cursor.func.dfg.num_values()); + + // In domtree preorder, visit blocks. (TODO: factor out an + // iterator from this and elaborator.) + let root = cursor.layout().entry_block().unwrap(); + enum StackEntry { + Visit(Block), + Pop, + } + let mut block_stack = vec![StackEntry::Visit(root)]; + while let Some(entry) = block_stack.pop() { + match entry { + StackEntry::Visit(block) => { + // We popped this block; push children + // immediately, then process this block. + block_stack.push(StackEntry::Pop); + block_stack.extend( + self.ctrl_plane + .shuffled(self.domtree.children(block)) + .map(StackEntry::Visit), + ); + gvn_map.increment_depth(); + gvn_map_blocks.push(block); + + trace!("Processing block {}", block); + cursor.set_position(CursorPosition::Before(block)); + + let mut alias_analysis_state = self.alias_analysis.block_starting_state(block); + + for ¶m in cursor.func.dfg.block_params(block) { + trace!("creating initial singleton eclass for blockparam {}", param); + value_to_opt_value[param] = param; + available_block[param] = block; + } + while let Some(inst) = cursor.next_inst() { + trace!("Processing inst {}", inst); + + // Rewrite args of *all* instructions using the + // value-to-opt-value map. + cursor.func.dfg.map_inst_values(inst, |arg| { + let new_value = value_to_opt_value[arg]; + trace!("rewriting arg {} of inst {} to {}", arg, inst, new_value); + debug_assert_ne!(new_value, Value::reserved_value()); + new_value + }); + + // Build a context for optimization, with borrows of + // state. We can't invoke a method on `self` because + // we've borrowed `self.func` mutably (as + // `cursor.func`) so we pull apart the pieces instead + // here. + let mut ctx = OptimizeCtx { + func: cursor.func, + value_to_opt_value: &mut value_to_opt_value, + gvn_map: &mut gvn_map, + gvn_map_blocks: &mut gvn_map_blocks, + available_block: &mut available_block, + rewrite_depth: 0, + subsume_values: FxHashSet::default(), + remat_values: &mut self.remat_values, + stats: &mut self.stats, + domtree: &self.domtree, + alias_analysis: self.alias_analysis, + alias_analysis_state: &mut alias_analysis_state, + flags: self.flags, + ctrl_plane: self.ctrl_plane, + optimized_values: Default::default(), + }; + + if is_pure_for_egraph(ctx.func, inst) { + // Insert into GVN map and optimize any new nodes + // inserted (recursively performing this work for + // any nodes the optimization rules produce). + let inst = NewOrExistingInst::Existing(inst); + ctx.insert_pure_enode(inst); + // We've now rewritten all uses, or will when we + // see them, and the instruction exists as a pure + // enode in the eclass, so we can remove it. + cursor.remove_inst_and_step_back(); + } else { + if ctx.optimize_skeleton_inst(inst, block) { + cursor.remove_inst_and_step_back(); + } + } + } + } + StackEntry::Pop => { + gvn_map.decrement_depth(); + gvn_map_blocks.pop(); + } + } + } + } + + /// Scoped elaboration: compute a final ordering of op computation + /// for each block and update the given Func body. After this + /// runs, the function body is back into the state where every + /// Inst with an used result is placed in the layout (possibly + /// duplicated, if our code-motion logic decides this is the best + /// option). + /// + /// This works in concert with the domtree. We do a preorder + /// traversal of the domtree, tracking a scoped map from Id to + /// (new) Value. The map's scopes correspond to levels in the + /// domtree. + /// + /// At each block, we iterate forward over the side-effecting + /// eclasses, and recursively generate their arg eclasses, then + /// emit the ops themselves. + /// + /// To use an eclass in a given block, we first look it up in the + /// scoped map, and get the Value if already present. If not, we + /// need to generate it. We emit the extracted enode for this + /// eclass after recursively generating its args. Eclasses are + /// thus computed "as late as possible", but then memoized into + /// the Id-to-Value map and available to all dominated blocks and + /// for the rest of this block. (This subsumes GVN.) + fn elaborate(&mut self) { + let mut elaborator = Elaborator::new( + self.func, + &self.domtree, + self.loop_analysis, + &self.remat_values, + &mut self.stats, + self.ctrl_plane, + ); + elaborator.elaborate(); + + self.check_post_egraph(); + } + + #[cfg(debug_assertions)] + fn check_post_egraph(&self) { + // Verify that no union nodes are reachable from inst args, + // and that all inst args' defining instructions are in the + // layout. + for block in self.func.layout.blocks() { + for inst in self.func.layout.block_insts(block) { + self.func + .dfg + .inst_values(inst) + .for_each(|arg| match self.func.dfg.value_def(arg) { + ValueDef::Result(i, _) => { + debug_assert!(self.func.layout.inst_block(i).is_some()); + } + ValueDef::Union(..) => { + panic!("egraph union node {arg} still reachable at {inst}!"); + } + _ => {} + }) + } + } + } + + #[cfg(not(debug_assertions))] + fn check_post_egraph(&self) {} +} + +/// Implementation of external-context equality and hashing on +/// InstructionData. This allows us to deduplicate instructions given +/// some context that lets us see its value lists, so we don't need to +/// store arguments inline in the `InstuctionData` (or alongside it in +/// some newly-defined key type) in all cases. +struct GVNContext<'a> { + value_lists: &'a ValueListPool, +} + +impl<'a> CtxEq<(Type, InstructionData), (Type, InstructionData)> for GVNContext<'a> { + fn ctx_eq( + &self, + (a_ty, a_inst): &(Type, InstructionData), + (b_ty, b_inst): &(Type, InstructionData), + ) -> bool { + a_ty == b_ty && a_inst.eq(b_inst, self.value_lists) + } +} + +impl<'a> CtxHash<(Type, InstructionData)> for GVNContext<'a> { + fn ctx_hash(&self, state: &mut H, (ty, inst): &(Type, InstructionData)) { + std::hash::Hash::hash(&ty, state); + inst.hash(state, self.value_lists); + } +} + +/// Statistics collected during egraph-based processing. +#[derive(Clone, Debug, Default)] +pub(crate) struct Stats { + pub(crate) pure_inst: u64, + pub(crate) pure_inst_deduped: u64, + pub(crate) pure_inst_subsume: u64, + pub(crate) pure_inst_rewrite_to_self: u64, + pub(crate) pure_inst_insert_orig: u64, + pub(crate) pure_inst_insert_new: u64, + pub(crate) skeleton_inst: u64, + pub(crate) skeleton_inst_gvn: u64, + pub(crate) alias_analysis_removed: u64, + pub(crate) new_inst: u64, + pub(crate) union: u64, + pub(crate) subsume: u64, + pub(crate) remat: u64, + pub(crate) rewrite_rule_invoked: u64, + pub(crate) rewrite_rule_results: u64, + pub(crate) rewrite_depth_limit: u64, + pub(crate) elaborate_visit_node: u64, + pub(crate) elaborate_memoize_hit: u64, + pub(crate) elaborate_memoize_miss: u64, + pub(crate) elaborate_remat: u64, + pub(crate) elaborate_licm_hoist: u64, + pub(crate) elaborate_func: u64, + pub(crate) elaborate_func_pre_insts: u64, + pub(crate) elaborate_func_post_insts: u64, + pub(crate) elaborate_best_cost_fixpoint_iters: u64, +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/egraph/cost.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/egraph/cost.rs new file mode 100644 index 000000000..a5cda3a92 --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/egraph/cost.rs @@ -0,0 +1,213 @@ +//! Cost functions for egraph representation. + +use crate::ir::Opcode; + +/// A cost of computing some value in the program. +/// +/// Costs are measured in an arbitrary union that we represent in a +/// `u32`. The ordering is meant to be meaningful, but the value of a +/// single unit is arbitrary (and "not to scale"). We use a collection +/// of heuristics to try to make this approximation at least usable. +/// +/// We start by defining costs for each opcode (see `pure_op_cost` +/// below). The cost of computing some value, initially, is the cost +/// of its opcode, plus the cost of computing its inputs. +/// +/// We then adjust the cost according to loop nests: for each +/// loop-nest level, we multiply by 1024. Because we only have 32 +/// bits, we limit this scaling to a loop-level of two (i.e., multiply +/// by 2^20 ~= 1M). +/// +/// Arithmetic on costs is always saturating: we don't want to wrap +/// around and return to a tiny cost when adding the costs of two very +/// expensive operations. It is better to approximate and lose some +/// precision than to lose the ordering by wrapping. +/// +/// Finally, we reserve the highest value, `u32::MAX`, as a sentinel +/// that means "infinite". This is separate from the finite costs and +/// not reachable by doing arithmetic on them (even when overflowing) +/// -- we saturate just *below* infinity. (This is done by the +/// `finite()` method.) An infinite cost is used to represent a value +/// that cannot be computed, or otherwise serve as a sentinel when +/// performing search for the lowest-cost representation of a value. +#[derive(Clone, Copy, PartialEq, Eq)] +pub(crate) struct Cost(u32); + +impl core::fmt::Debug for Cost { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + if *self == Cost::infinity() { + write!(f, "Cost::Infinite") + } else { + f.debug_struct("Cost::Finite") + .field("op_cost", &self.op_cost()) + .field("depth", &self.depth()) + .finish() + } + } +} + +impl Ord for Cost { + #[inline] + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + // We make sure that the high bits are the op cost and the low bits are + // the depth. This means that we can use normal integer comparison to + // order by op cost and then depth. + // + // We want to break op cost ties with depth (rather than the other way + // around). When the op cost is the same, we prefer shallow and wide + // expressions to narrow and deep expressions and breaking ties with + // `depth` gives us that. For example, `(a + b) + (c + d)` is preferred + // to `((a + b) + c) + d`. This is beneficial because it exposes more + // instruction-level parallelism and shortens live ranges. + self.0.cmp(&other.0) + } +} + +impl PartialOrd for Cost { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Cost { + const DEPTH_BITS: u8 = 8; + const DEPTH_MASK: u32 = (1 << Self::DEPTH_BITS) - 1; + const OP_COST_MASK: u32 = !Self::DEPTH_MASK; + const MAX_OP_COST: u32 = Self::OP_COST_MASK >> Self::DEPTH_BITS; + + pub(crate) fn infinity() -> Cost { + // 2^32 - 1 is, uh, pretty close to infinite... (we use `Cost` + // only for heuristics and always saturate so this suffices!) + Cost(u32::MAX) + } + + pub(crate) fn zero() -> Cost { + Cost(0) + } + + /// Construct a new `Cost` from the given parts. + /// + /// If the opcode cost is greater than or equal to the maximum representable + /// opcode cost, then the resulting `Cost` saturates to infinity. + fn new(opcode_cost: u32, depth: u8) -> Cost { + if opcode_cost >= Self::MAX_OP_COST { + Self::infinity() + } else { + Cost(opcode_cost << Self::DEPTH_BITS | u32::from(depth)) + } + } + + fn depth(&self) -> u8 { + let depth = self.0 & Self::DEPTH_MASK; + u8::try_from(depth).unwrap() + } + + fn op_cost(&self) -> u32 { + (self.0 & Self::OP_COST_MASK) >> Self::DEPTH_BITS + } + + /// Compute the cost of the operation and its given operands. + /// + /// Caller is responsible for checking that the opcode came from an instruction + /// that satisfies `inst_predicates::is_pure_for_egraph()`. + pub(crate) fn of_pure_op(op: Opcode, operand_costs: impl IntoIterator) -> Self { + let c = pure_op_cost(op) + operand_costs.into_iter().sum(); + Cost::new(c.op_cost(), c.depth().saturating_add(1)) + } +} + +impl std::iter::Sum for Cost { + fn sum>(iter: I) -> Self { + iter.fold(Self::zero(), |a, b| a + b) + } +} + +impl std::default::Default for Cost { + fn default() -> Cost { + Cost::zero() + } +} + +impl std::ops::Add for Cost { + type Output = Cost; + + fn add(self, other: Cost) -> Cost { + let op_cost = self.op_cost().saturating_add(other.op_cost()); + let depth = std::cmp::max(self.depth(), other.depth()); + Cost::new(op_cost, depth) + } +} + +/// Return the cost of a *pure* opcode. +/// +/// Caller is responsible for checking that the opcode came from an instruction +/// that satisfies `inst_predicates::is_pure_for_egraph()`. +fn pure_op_cost(op: Opcode) -> Cost { + match op { + // Constants. + Opcode::Iconst | Opcode::F32const | Opcode::F64const => Cost::new(1, 0), + + // Extends/reduces. + Opcode::Uextend | Opcode::Sextend | Opcode::Ireduce | Opcode::Iconcat | Opcode::Isplit => { + Cost::new(1, 0) + } + + // "Simple" arithmetic. + Opcode::Iadd + | Opcode::Isub + | Opcode::Band + | Opcode::Bor + | Opcode::Bxor + | Opcode::Bnot + | Opcode::Ishl + | Opcode::Ushr + | Opcode::Sshr => Cost::new(3, 0), + + // Everything else (pure.) + _ => Cost::new(4, 0), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn add_cost() { + let a = Cost::new(5, 2); + let b = Cost::new(37, 3); + assert_eq!(a + b, Cost::new(42, 3)); + assert_eq!(b + a, Cost::new(42, 3)); + } + + #[test] + fn add_infinity() { + let a = Cost::new(5, 2); + let b = Cost::infinity(); + assert_eq!(a + b, Cost::infinity()); + assert_eq!(b + a, Cost::infinity()); + } + + #[test] + fn op_cost_saturates_to_infinity() { + let a = Cost::new(Cost::MAX_OP_COST - 10, 2); + let b = Cost::new(11, 2); + assert_eq!(a + b, Cost::infinity()); + assert_eq!(b + a, Cost::infinity()); + } + + #[test] + fn depth_saturates_to_max_depth() { + let a = Cost::new(10, u8::MAX); + let b = Cost::new(10, 1); + assert_eq!( + Cost::of_pure_op(Opcode::Iconst, [a, b]), + Cost::new(21, u8::MAX) + ); + assert_eq!( + Cost::of_pure_op(Opcode::Iconst, [b, a]), + Cost::new(21, u8::MAX) + ); + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/egraph/elaborate.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/egraph/elaborate.rs new file mode 100644 index 000000000..203eed6ff --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/egraph/elaborate.rs @@ -0,0 +1,841 @@ +//! Elaboration phase: lowers EGraph back to sequences of operations +//! in CFG nodes. + +use super::cost::Cost; +use super::Stats; +use crate::ctxhash::NullCtx; +use crate::dominator_tree::DominatorTreePreorder; +use crate::hash_map::Entry as HashEntry; +use crate::inst_predicates::is_pure_for_egraph; +use crate::ir::{Block, Function, Inst, Value, ValueDef}; +use crate::loop_analysis::{Loop, LoopAnalysis}; +use crate::scoped_hash_map::ScopedHashMap; +use crate::trace; +use alloc::vec::Vec; +use cranelift_control::ControlPlane; +use cranelift_entity::{packed_option::ReservedValue, SecondaryMap}; +use rustc_hash::{FxHashMap, FxHashSet}; +use smallvec::{smallvec, SmallVec}; + +pub(crate) struct Elaborator<'a> { + func: &'a mut Function, + domtree: &'a DominatorTreePreorder, + loop_analysis: &'a LoopAnalysis, + /// Map from Value that is produced by a pure Inst (and was thus + /// not in the side-effecting skeleton) to the value produced by + /// an elaborated inst (placed in the layout) to whose results we + /// refer in the final code. + /// + /// The first time we use some result of an instruction during + /// elaboration, we can place it and insert an identity map (inst + /// results to that same inst's results) in this scoped + /// map. Within that block and its dom-tree children, that mapping + /// is visible and we can continue to use it. This allows us to + /// avoid cloning the instruction. However, if we pop that scope + /// and use it somewhere else as well, we will need to + /// duplicate. We detect this case by checking, when a value that + /// we want is not present in this map, whether the producing inst + /// is already placed in the Layout. If so, we duplicate, and + /// insert non-identity mappings from the original inst's results + /// to the cloned inst's results. + /// + /// Note that as values may refer to unions that represent a subset + /// of a larger eclass, it's not valid to walk towards the root of a + /// union tree: doing so would potentially equate values that fall + /// on different branches of the dominator tree. + value_to_elaborated_value: ScopedHashMap, + /// Map from Value to the best (lowest-cost) Value in its eclass + /// (tree of union value-nodes). + value_to_best_value: SecondaryMap, + /// Stack of blocks and loops in current elaboration path. + loop_stack: SmallVec<[LoopStackEntry; 8]>, + /// The current block into which we are elaborating. + cur_block: Block, + /// Values that opt rules have indicated should be rematerialized + /// in every block they are used (e.g., immediates or other + /// "cheap-to-compute" ops). + remat_values: &'a FxHashSet, + /// Explicitly-unrolled value elaboration stack. + elab_stack: Vec, + /// Results from the elab stack. + elab_result_stack: Vec, + /// Explicitly-unrolled block elaboration stack. + block_stack: Vec, + /// Copies of values that have been rematerialized. + remat_copies: FxHashMap<(Block, Value), Value>, + /// Stats for various events during egraph processing, to help + /// with optimization of this infrastructure. + stats: &'a mut Stats, + /// Chaos-mode control-plane so we can test that we still get + /// correct results when our heuristics make bad decisions. + ctrl_plane: &'a mut ControlPlane, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +struct BestEntry(Cost, Value); + +impl PartialOrd for BestEntry { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for BestEntry { + #[inline] + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.0.cmp(&other.0).then_with(|| { + // Note that this comparison is reversed. When costs are equal, + // prefer the value with the bigger index. This is a heuristic that + // prefers results of rewrites to the original value, since we + // expect that our rewrites are generally improvements. + self.1.cmp(&other.1).reverse() + }) + } +} + +#[derive(Clone, Copy, Debug)] +struct ElaboratedValue { + in_block: Block, + value: Value, +} + +#[derive(Clone, Debug)] +struct LoopStackEntry { + /// The loop identifier. + lp: Loop, + /// The hoist point: a block that immediately dominates this + /// loop. May not be an immediate predecessor, but will be a valid + /// point to place all loop-invariant ops: they must depend only + /// on inputs that dominate the loop, so are available at (the end + /// of) this block. + hoist_block: Block, + /// The depth in the scope map. + scope_depth: u32, +} + +#[derive(Clone, Debug)] +enum ElabStackEntry { + /// Next action is to resolve this value into an elaborated inst + /// (placed into the layout) that produces the value, and + /// recursively elaborate the insts that produce its args. + /// + /// Any inserted ops should be inserted before `before`, which is + /// the instruction demanding this value. + Start { value: Value, before: Inst }, + /// Args have been pushed; waiting for results. + PendingInst { + inst: Inst, + result_idx: usize, + num_args: usize, + before: Inst, + }, +} + +#[derive(Clone, Debug)] +enum BlockStackEntry { + Elaborate { block: Block, idom: Option }, + Pop, +} + +impl<'a> Elaborator<'a> { + pub(crate) fn new( + func: &'a mut Function, + domtree: &'a DominatorTreePreorder, + loop_analysis: &'a LoopAnalysis, + remat_values: &'a FxHashSet, + stats: &'a mut Stats, + ctrl_plane: &'a mut ControlPlane, + ) -> Self { + let num_values = func.dfg.num_values(); + let mut value_to_best_value = + SecondaryMap::with_default(BestEntry(Cost::infinity(), Value::reserved_value())); + value_to_best_value.resize(num_values); + Self { + func, + domtree, + loop_analysis, + value_to_elaborated_value: ScopedHashMap::with_capacity(num_values), + value_to_best_value, + loop_stack: smallvec![], + cur_block: Block::reserved_value(), + remat_values, + elab_stack: vec![], + elab_result_stack: vec![], + block_stack: vec![], + remat_copies: FxHashMap::default(), + stats, + ctrl_plane, + } + } + + fn start_block(&mut self, idom: Option, block: Block) { + trace!( + "start_block: block {:?} with idom {:?} at loop depth {:?} scope depth {}", + block, + idom, + self.loop_stack.len(), + self.value_to_elaborated_value.depth() + ); + + // Pop any loop levels we're no longer in. + while let Some(inner_loop) = self.loop_stack.last() { + if self.loop_analysis.is_in_loop(block, inner_loop.lp) { + break; + } + self.loop_stack.pop(); + } + + // Note that if the *entry* block is a loop header, we will + // not make note of the loop here because it will not have an + // immediate dominator. We must disallow this case because we + // will skip adding the `LoopStackEntry` here but our + // `LoopAnalysis` will otherwise still make note of this loop + // and loop depths will not match. + if let Some(idom) = idom { + if let Some(lp) = self.loop_analysis.is_loop_header(block) { + self.loop_stack.push(LoopStackEntry { + lp, + // Any code hoisted out of this loop will have code + // placed in `idom`, and will have def mappings + // inserted in to the scoped hashmap at that block's + // level. + hoist_block: idom, + scope_depth: (self.value_to_elaborated_value.depth() - 1) as u32, + }); + trace!( + " -> loop header, pushing; depth now {}", + self.loop_stack.len() + ); + } + } else { + debug_assert!( + self.loop_analysis.is_loop_header(block).is_none(), + "Entry block (domtree root) cannot be a loop header!" + ); + } + + trace!("block {}: loop stack is {:?}", block, self.loop_stack); + + self.cur_block = block; + } + + fn compute_best_values(&mut self) { + let best = &mut self.value_to_best_value; + + // We can't make random decisions inside the fixpoint loop below because + // that could cause values to change on every iteration of the loop, + // which would make the loop never terminate. So in chaos testing + // mode we need a form of making suboptimal decisions that is fully + // deterministic. We choose to simply make the worst decision we know + // how to do instead of the best. + let use_worst = self.ctrl_plane.get_decision(); + + // Do a fixpoint loop to compute the best value for each eclass. + // + // The maximum number of iterations is the length of the longest chain + // of `vNN -> vMM` edges in the dataflow graph where `NN < MM`, so this + // is *technically* quadratic, but `cranelift-frontend` won't construct + // any such edges. NaN canonicalization will introduce some of these + // edges, but they are chains of only two or three edges. So in + // practice, we *never* do more than a handful of iterations here unless + // (a) we parsed the CLIF from text and the text was funkily numbered, + // which we don't really care about, or (b) the CLIF producer did + // something weird, in which case it is their responsibility to stop + // doing that. + trace!( + "Entering fixpoint loop to compute the {} values for each eclass", + if use_worst { + "worst (chaos mode)" + } else { + "best" + } + ); + let mut keep_going = true; + while keep_going { + keep_going = false; + trace!( + "fixpoint iteration {}", + self.stats.elaborate_best_cost_fixpoint_iters + ); + self.stats.elaborate_best_cost_fixpoint_iters += 1; + + for (value, def) in self.func.dfg.values_and_defs() { + trace!("computing best for value {:?} def {:?}", value, def); + let orig_best_value = best[value]; + + match def { + ValueDef::Union(x, y) => { + // Pick the best of the two options based on + // min-cost. This works because each element of `best` + // is a `(cost, value)` tuple; `cost` comes first so + // the natural comparison works based on cost, and + // breaks ties based on value number. + best[value] = if use_worst { + if best[x].1.is_reserved_value() { + best[y] + } else if best[y].1.is_reserved_value() { + best[x] + } else { + std::cmp::max(best[x], best[y]) + } + } else { + std::cmp::min(best[x], best[y]) + }; + trace!( + " -> best of union({:?}, {:?}) = {:?}", + best[x], + best[y], + best[value] + ); + } + ValueDef::Param(_, _) => { + best[value] = BestEntry(Cost::zero(), value); + } + // If the Inst is inserted into the layout (which is, + // at this point, only the side-effecting skeleton), + // then it must be computed and thus we give it zero + // cost. + ValueDef::Result(inst, _) => { + if let Some(_) = self.func.layout.inst_block(inst) { + best[value] = BestEntry(Cost::zero(), value); + } else { + let inst_data = &self.func.dfg.insts[inst]; + // N.B.: at this point we know that the opcode is + // pure, so `pure_op_cost`'s precondition is + // satisfied. + let cost = Cost::of_pure_op( + inst_data.opcode(), + self.func.dfg.inst_values(inst).map(|value| best[value].0), + ); + best[value] = BestEntry(cost, value); + trace!(" -> cost of value {} = {:?}", value, cost); + } + } + }; + + // Keep on iterating the fixpoint loop while we are finding new + // best values. + keep_going |= orig_best_value != best[value]; + } + } + + if cfg!(any(feature = "trace-log", debug_assertions)) { + trace!("finished fixpoint loop to compute best value for each eclass"); + for value in self.func.dfg.values() { + trace!("-> best for eclass {:?}: {:?}", value, best[value]); + debug_assert_ne!(best[value].1, Value::reserved_value()); + // You might additionally be expecting an assert that the best + // cost is not infinity, however infinite cost *can* happen in + // practice. First, note that our cost function doesn't know + // about any shared structure in the dataflow graph, it only + // sums operand costs. (And trying to avoid that by deduping a + // single operation's operands is a losing game because you can + // always just add one indirection and go from `add(x, x)` to + // `add(foo(x), bar(x))` to hide the shared structure.) Given + // that blindness to sharing, we can make cost grow + // exponentially with a linear sequence of operations: + // + // v0 = iconst.i32 1 ;; cost = 1 + // v1 = iadd v0, v0 ;; cost = 3 + 1 + 1 + // v2 = iadd v1, v1 ;; cost = 3 + 5 + 5 + // v3 = iadd v2, v2 ;; cost = 3 + 13 + 13 + // v4 = iadd v3, v3 ;; cost = 3 + 29 + 29 + // v5 = iadd v4, v4 ;; cost = 3 + 61 + 61 + // v6 = iadd v5, v5 ;; cost = 3 + 125 + 125 + // ;; etc... + // + // Such a chain can cause cost to saturate to infinity. How do + // we choose which e-node is best when there are multiple that + // have saturated to infinity? It doesn't matter. As long as + // invariant (2) for optimization rules is upheld by our rule + // set (see `cranelift/codegen/src/opts/README.md`) it is safe + // to choose *any* e-node in the e-class. At worst we will + // produce suboptimal code, but never an incorrectness. + } + } + } + + /// Elaborate use of an eclass, inserting any needed new + /// instructions before the given inst `before`. Should only be + /// given values corresponding to results of instructions or + /// blockparams. + fn elaborate_eclass_use(&mut self, value: Value, before: Inst) -> ElaboratedValue { + debug_assert_ne!(value, Value::reserved_value()); + + // Kick off the process by requesting this result + // value. + self.elab_stack + .push(ElabStackEntry::Start { value, before }); + + // Now run the explicit-stack recursion until we reach + // the root. + self.process_elab_stack(); + debug_assert_eq!(self.elab_result_stack.len(), 1); + self.elab_result_stack.pop().unwrap() + } + + /// Possibly rematerialize the instruction producing the value in + /// `arg` and rewrite `arg` to refer to it, if needed. Returns + /// `true` if a rewrite occurred. + fn maybe_remat_arg( + remat_values: &FxHashSet, + func: &mut Function, + remat_copies: &mut FxHashMap<(Block, Value), Value>, + insert_block: Block, + before: Inst, + arg: &mut ElaboratedValue, + stats: &mut Stats, + ) -> bool { + // TODO (#7313): we may want to consider recursive + // rematerialization as well. We could process the arguments of + // the rematerialized instruction up to a certain depth. This + // would affect, e.g., adds-with-one-constant-arg, which are + // currently rematerialized. Right now we don't do this, to + // avoid the need for another fixpoint loop here. + if arg.in_block != insert_block && remat_values.contains(&arg.value) { + let new_value = match remat_copies.entry((insert_block, arg.value)) { + HashEntry::Occupied(o) => *o.get(), + HashEntry::Vacant(v) => { + let inst = func.dfg.value_def(arg.value).inst().unwrap(); + debug_assert_eq!(func.dfg.inst_results(inst).len(), 1); + let new_inst = func.dfg.clone_inst(inst); + func.layout.insert_inst(new_inst, before); + let new_result = func.dfg.inst_results(new_inst)[0]; + *v.insert(new_result) + } + }; + trace!("rematerialized {} as {}", arg.value, new_value); + arg.value = new_value; + stats.elaborate_remat += 1; + true + } else { + false + } + } + + fn process_elab_stack(&mut self) { + while let Some(entry) = self.elab_stack.pop() { + match entry { + ElabStackEntry::Start { value, before } => { + debug_assert!(self.func.dfg.value_is_real(value)); + + self.stats.elaborate_visit_node += 1; + + // Get the best option; we use `value` (latest + // value) here so we have a full view of the + // eclass. + trace!("looking up best value for {}", value); + let BestEntry(_, best_value) = self.value_to_best_value[value]; + trace!("elaborate: value {} -> best {}", value, best_value); + debug_assert_ne!(best_value, Value::reserved_value()); + + if let Some(elab_val) = + self.value_to_elaborated_value.get(&NullCtx, &best_value) + { + // Value is available; use it. + trace!("elaborate: value {} -> {:?}", value, elab_val); + self.stats.elaborate_memoize_hit += 1; + self.elab_result_stack.push(*elab_val); + continue; + } + + self.stats.elaborate_memoize_miss += 1; + + // Now resolve the value to its definition to see + // how we can compute it. + let (inst, result_idx) = match self.func.dfg.value_def(best_value) { + ValueDef::Result(inst, result_idx) => { + trace!( + " -> value {} is result {} of {}", + best_value, + result_idx, + inst + ); + (inst, result_idx) + } + ValueDef::Param(in_block, _) => { + // We don't need to do anything to compute + // this value; just push its result on the + // result stack (blockparams are already + // available). + trace!(" -> value {} is a blockparam", best_value); + self.elab_result_stack.push(ElaboratedValue { + in_block, + value: best_value, + }); + continue; + } + ValueDef::Union(_, _) => { + panic!("Should never have a Union value as the best value"); + } + }; + + trace!( + " -> result {} of inst {:?}", + result_idx, + self.func.dfg.insts[inst] + ); + + // We're going to need to use this instruction + // result, placing the instruction into the + // layout. First, enqueue all args to be + // elaborated. Push state to receive the results + // and later elab this inst. + let num_args = self.func.dfg.inst_values(inst).count(); + self.elab_stack.push(ElabStackEntry::PendingInst { + inst, + result_idx, + num_args, + before, + }); + + // Push args in reverse order so we process the + // first arg first. + for arg in self.func.dfg.inst_values(inst).rev() { + debug_assert_ne!(arg, Value::reserved_value()); + self.elab_stack + .push(ElabStackEntry::Start { value: arg, before }); + } + } + + ElabStackEntry::PendingInst { + inst, + result_idx, + num_args, + before, + } => { + trace!( + "PendingInst: {} result {} args {} before {}", + inst, + result_idx, + num_args, + before + ); + + // We should have all args resolved at this + // point. Grab them and drain them out, removing + // them. + let arg_idx = self.elab_result_stack.len() - num_args; + let arg_values = &mut self.elab_result_stack[arg_idx..]; + + // Compute max loop depth. + // + // Note that if there are no arguments then this instruction + // is allowed to get hoisted up one loop. This is not + // usually used since no-argument values are things like + // constants which are typically rematerialized, but for the + // `vconst` instruction 128-bit constants aren't as easily + // rematerialized. They're hoisted out of inner loops but + // not to the function entry which may run the risk of + // placing too much register pressure on the entire + // function. This is modeled with the `.saturating_sub(1)` + // as the default if there's otherwise no maximum. + let loop_hoist_level = arg_values + .iter() + .map(|&value| { + // Find the outermost loop level at which + // the value's defining block *is not* a + // member. This is the loop-nest level + // whose hoist-block we hoist to. + let hoist_level = self + .loop_stack + .iter() + .position(|loop_entry| { + !self.loop_analysis.is_in_loop(value.in_block, loop_entry.lp) + }) + .unwrap_or(self.loop_stack.len()); + trace!( + " -> arg: elab_value {:?} hoist level {:?}", + value, + hoist_level + ); + hoist_level + }) + .max() + .unwrap_or(self.loop_stack.len().saturating_sub(1)); + trace!( + " -> loop hoist level: {:?}; cur loop depth: {:?}, loop_stack: {:?}", + loop_hoist_level, + self.loop_stack.len(), + self.loop_stack, + ); + + // We know that this is a pure inst, because + // non-pure roots have already been placed in the + // value-to-elab'd-value map, so they will not + // reach this stage of processing. + // + // We now must determine the location at which we + // place the instruction. This is the current + // block *unless* we hoist above a loop when all + // args are loop-invariant (and this op is pure). + let (scope_depth, before, insert_block) = + if loop_hoist_level == self.loop_stack.len() { + // Depends on some value at the current + // loop depth, or remat forces it here: + // place it at the current location. + ( + self.value_to_elaborated_value.depth(), + before, + self.func.layout.inst_block(before).unwrap(), + ) + } else { + // Does not depend on any args at current + // loop depth: hoist out of loop. + self.stats.elaborate_licm_hoist += 1; + let data = &self.loop_stack[loop_hoist_level]; + // `data.hoist_block` should dominate `before`'s block. + let before_block = self.func.layout.inst_block(before).unwrap(); + debug_assert!(self.domtree.dominates(data.hoist_block, before_block)); + // Determine the instruction at which we + // insert in `data.hoist_block`. + let before = self.func.layout.last_inst(data.hoist_block).unwrap(); + (data.scope_depth as usize, before, data.hoist_block) + }; + + trace!( + " -> decided to place: before {} insert_block {}", + before, + insert_block + ); + + // Now that we have the location for the + // instruction, check if any of its args are remat + // values. If so, and if we don't have a copy of + // the rematerializing instruction for this block + // yet, create one. + let mut remat_arg = false; + for arg_value in arg_values.iter_mut() { + if Self::maybe_remat_arg( + &self.remat_values, + &mut self.func, + &mut self.remat_copies, + insert_block, + before, + arg_value, + &mut self.stats, + ) { + remat_arg = true; + } + } + + // Now we need to place `inst` at the computed + // location (just before `before`). Note that + // `inst` may already have been placed somewhere + // else, because a pure node may be elaborated at + // more than one place. In this case, we need to + // duplicate the instruction (and return the + // `Value`s for that duplicated instance instead). + // + // Also clone if we rematerialized, because we + // don't want to rewrite the args in the original + // copy. + trace!("need inst {} before {}", inst, before); + let inst = if self.func.layout.inst_block(inst).is_some() || remat_arg { + // Clone the inst! + let new_inst = self.func.dfg.clone_inst(inst); + trace!( + " -> inst {} already has a location; cloned to {}", + inst, + new_inst + ); + // Create mappings in the + // value-to-elab'd-value map from original + // results to cloned results. + for (&result, &new_result) in self + .func + .dfg + .inst_results(inst) + .iter() + .zip(self.func.dfg.inst_results(new_inst).iter()) + { + let elab_value = ElaboratedValue { + value: new_result, + in_block: insert_block, + }; + let best_result = self.value_to_best_value[result]; + self.value_to_elaborated_value.insert_if_absent_with_depth( + &NullCtx, + best_result.1, + elab_value, + scope_depth, + ); + + self.value_to_best_value[new_result] = best_result; + + trace!( + " -> cloned inst has new result {} for orig {}", + new_result, + result + ); + } + new_inst + } else { + trace!(" -> no location; using original inst"); + // Create identity mappings from result values + // to themselves in this scope, since we're + // using the original inst. + for &result in self.func.dfg.inst_results(inst) { + let elab_value = ElaboratedValue { + value: result, + in_block: insert_block, + }; + let best_result = self.value_to_best_value[result]; + self.value_to_elaborated_value.insert_if_absent_with_depth( + &NullCtx, + best_result.1, + elab_value, + scope_depth, + ); + trace!(" -> inserting identity mapping for {}", result); + } + inst + }; + + // Place the inst just before `before`. + assert!( + is_pure_for_egraph(self.func, inst), + "something has gone very wrong if we are elaborating effectful \ + instructions, they should have remained in the skeleton" + ); + self.func.layout.insert_inst(inst, before); + + // Update the inst's arguments. + self.func + .dfg + .overwrite_inst_values(inst, arg_values.into_iter().map(|ev| ev.value)); + + // Now that we've consumed the arg values, pop + // them off the stack. + self.elab_result_stack.truncate(arg_idx); + + // Push the requested result index of the + // instruction onto the elab-results stack. + self.elab_result_stack.push(ElaboratedValue { + in_block: insert_block, + value: self.func.dfg.inst_results(inst)[result_idx], + }); + } + } + } + } + + fn elaborate_block(&mut self, elab_values: &mut Vec, idom: Option, block: Block) { + trace!("elaborate_block: block {}", block); + self.start_block(idom, block); + + // Iterate over the side-effecting skeleton using the linked + // list in Layout. We will insert instructions that are + // elaborated *before* `inst`, so we can always use its + // next-link to continue the iteration. + let mut next_inst = self.func.layout.first_inst(block); + let mut first_branch = None; + while let Some(inst) = next_inst { + trace!( + "elaborating inst {} with results {:?}", + inst, + self.func.dfg.inst_results(inst) + ); + // Record the first branch we see in the block; all + // elaboration for args of *any* branch must be inserted + // before the *first* branch, because the branch group + // must remain contiguous at the end of the block. + if self.func.dfg.insts[inst].opcode().is_branch() && first_branch == None { + first_branch = Some(inst); + } + + // Determine where elaboration inserts insts. + let before = first_branch.unwrap_or(inst); + trace!(" -> inserting before {}", before); + + elab_values.extend(self.func.dfg.inst_values(inst)); + for arg in elab_values.iter_mut() { + trace!(" -> arg {}", *arg); + // Elaborate the arg, placing any newly-inserted insts + // before `before`. Get the updated value, which may + // be different than the original. + let mut new_arg = self.elaborate_eclass_use(*arg, before); + Self::maybe_remat_arg( + &self.remat_values, + &mut self.func, + &mut self.remat_copies, + block, + inst, + &mut new_arg, + &mut self.stats, + ); + trace!(" -> rewrote arg to {:?}", new_arg); + *arg = new_arg.value; + } + self.func + .dfg + .overwrite_inst_values(inst, elab_values.drain(..)); + + // We need to put the results of this instruction in the + // map now. + for &result in self.func.dfg.inst_results(inst) { + trace!(" -> result {}", result); + let best_result = self.value_to_best_value[result]; + self.value_to_elaborated_value.insert_if_absent( + &NullCtx, + best_result.1, + ElaboratedValue { + in_block: block, + value: result, + }, + ); + } + + next_inst = self.func.layout.next_inst(inst); + } + } + + fn elaborate_domtree(&mut self, domtree: &DominatorTreePreorder) { + self.block_stack.push(BlockStackEntry::Elaborate { + block: self.func.layout.entry_block().unwrap(), + idom: None, + }); + + // A temporary workspace for elaborate_block, allocated here to maximize the use of the + // allocation. + let mut elab_values = Vec::new(); + + while let Some(top) = self.block_stack.pop() { + match top { + BlockStackEntry::Elaborate { block, idom } => { + self.block_stack.push(BlockStackEntry::Pop); + self.value_to_elaborated_value.increment_depth(); + + self.elaborate_block(&mut elab_values, idom, block); + + // Push children. We are doing a preorder + // traversal so we do this after processing this + // block above. + let block_stack_end = self.block_stack.len(); + for child in self.ctrl_plane.shuffled(domtree.children(block)) { + self.block_stack.push(BlockStackEntry::Elaborate { + block: child, + idom: Some(block), + }); + } + // Reverse what we just pushed so we elaborate in + // original block order. (The domtree iter is a + // single-ended iter over a singly-linked list so + // we can't `.rev()` above.) + self.block_stack[block_stack_end..].reverse(); + } + BlockStackEntry::Pop => { + self.value_to_elaborated_value.decrement_depth(); + } + } + } + } + + pub(crate) fn elaborate(&mut self) { + self.stats.elaborate_func += 1; + self.stats.elaborate_func_pre_insts += self.func.dfg.num_insts() as u64; + self.compute_best_values(); + self.elaborate_domtree(&self.domtree); + self.stats.elaborate_func_post_insts += self.func.dfg.num_insts() as u64; + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/flowgraph.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/flowgraph.rs new file mode 100644 index 000000000..b57af3105 --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/flowgraph.rs @@ -0,0 +1,349 @@ +//! A control flow graph represented as mappings of basic blocks to their predecessors +//! and successors. +//! +//! Successors are represented as basic blocks while predecessors are represented by basic +//! blocks. Basic blocks are denoted by tuples of block and branch/jump instructions. Each +//! predecessor tuple corresponds to the end of a basic block. +//! +//! ```c +//! Block0: +//! ... ; beginning of basic block +//! +//! ... +//! +//! brif vx, Block1, Block2 ; end of basic block +//! +//! Block1: +//! jump block3 +//! ``` +//! +//! Here `Block1` and `Block2` would each have a single predecessor denoted as `(Block0, brif)`, +//! while `Block3` would have a single predecessor denoted as `(Block1, jump block3)`. + +use crate::bforest; +use crate::entity::SecondaryMap; +use crate::inst_predicates; +use crate::ir::{Block, Function, Inst}; +use crate::timing; +use core::mem; + +/// A basic block denoted by its enclosing Block and last instruction. +#[derive(Debug, PartialEq, Eq)] +pub struct BlockPredecessor { + /// Enclosing Block key. + pub block: Block, + /// Last instruction in the basic block. + pub inst: Inst, +} + +impl BlockPredecessor { + /// Convenient method to construct new BlockPredecessor. + pub fn new(block: Block, inst: Inst) -> Self { + Self { block, inst } + } +} + +/// A container for the successors and predecessors of some Block. +#[derive(Clone, Default)] +struct CFGNode { + /// Instructions that can branch or jump to this block. + /// + /// This maps branch instruction -> predecessor block which is redundant since the block containing + /// the branch instruction is available from the `layout.inst_block()` method. We store the + /// redundant information because: + /// + /// 1. Many `pred_iter()` consumers want the block anyway, so it is handily available. + /// 2. The `invalidate_block_successors()` may be called *after* branches have been removed from + /// their block, but we still need to remove them form the old block predecessor map. + /// + /// The redundant block stored here is always consistent with the CFG successor lists, even after + /// the IR has been edited. + pub predecessors: bforest::Map, + + /// Set of blocks that are the targets of branches and jumps in this block. + /// The set is ordered by block number, indicated by the `()` comparator type. + pub successors: bforest::Set, +} + +/// The Control Flow Graph maintains a mapping of blocks to their predecessors +/// and successors where predecessors are basic blocks and successors are +/// basic blocks. +pub struct ControlFlowGraph { + data: SecondaryMap, + pred_forest: bforest::MapForest, + succ_forest: bforest::SetForest, + valid: bool, +} + +impl ControlFlowGraph { + /// Allocate a new blank control flow graph. + pub fn new() -> Self { + Self { + data: SecondaryMap::new(), + valid: false, + pred_forest: bforest::MapForest::new(), + succ_forest: bforest::SetForest::new(), + } + } + + /// Clear all data structures in this control flow graph. + pub fn clear(&mut self) { + self.data.clear(); + self.pred_forest.clear(); + self.succ_forest.clear(); + self.valid = false; + } + + /// Allocate and compute the control flow graph for `func`. + pub fn with_function(func: &Function) -> Self { + let mut cfg = Self::new(); + cfg.compute(func); + cfg + } + + /// Compute the control flow graph of `func`. + /// + /// This will clear and overwrite any information already stored in this data structure. + pub fn compute(&mut self, func: &Function) { + let _tt = timing::flowgraph(); + self.clear(); + self.data.resize(func.dfg.num_blocks()); + + for block in &func.layout { + self.compute_block(func, block); + } + + self.valid = true; + } + + fn compute_block(&mut self, func: &Function, block: Block) { + inst_predicates::visit_block_succs(func, block, |inst, dest, _| { + self.add_edge(block, inst, dest); + }); + } + + fn invalidate_block_successors(&mut self, block: Block) { + // Temporarily take ownership because we need mutable access to self.data inside the loop. + // Unfortunately borrowck cannot see that our mut accesses to predecessors don't alias + // our iteration over successors. + let mut successors = mem::replace(&mut self.data[block].successors, Default::default()); + for succ in successors.iter(&self.succ_forest) { + self.data[succ] + .predecessors + .retain(&mut self.pred_forest, |_, &mut e| e != block); + } + successors.clear(&mut self.succ_forest); + } + + /// Recompute the control flow graph of `block`. + /// + /// This is for use after modifying instructions within a specific block. It recomputes all edges + /// from `block` while leaving edges to `block` intact. Its functionality a subset of that of the + /// more expensive `compute`, and should be used when we know we don't need to recompute the CFG + /// from scratch, but rather that our changes have been restricted to specific blocks. + pub fn recompute_block(&mut self, func: &Function, block: Block) { + debug_assert!(self.is_valid()); + self.invalidate_block_successors(block); + self.compute_block(func, block); + } + + fn add_edge(&mut self, from: Block, from_inst: Inst, to: Block) { + self.data[from] + .successors + .insert(to, &mut self.succ_forest, &()); + self.data[to] + .predecessors + .insert(from_inst, from, &mut self.pred_forest, &()); + } + + /// Get an iterator over the CFG predecessors to `block`. + pub fn pred_iter(&self, block: Block) -> PredIter { + PredIter(self.data[block].predecessors.iter(&self.pred_forest)) + } + + /// Get an iterator over the CFG successors to `block`. + pub fn succ_iter(&self, block: Block) -> SuccIter { + debug_assert!(self.is_valid()); + self.data[block].successors.iter(&self.succ_forest) + } + + /// Check if the CFG is in a valid state. + /// + /// Note that this doesn't perform any kind of validity checks. It simply checks if the + /// `compute()` method has been called since the last `clear()`. It does not check that the + /// CFG is consistent with the function. + pub fn is_valid(&self) -> bool { + self.valid + } +} + +/// An iterator over block predecessors. The iterator type is `BlockPredecessor`. +/// +/// Each predecessor is an instruction that branches to the block. +pub struct PredIter<'a>(bforest::MapIter<'a, Inst, Block>); + +impl<'a> Iterator for PredIter<'a> { + type Item = BlockPredecessor; + + fn next(&mut self) -> Option { + self.0.next().map(|(i, e)| BlockPredecessor::new(e, i)) + } +} + +/// An iterator over block successors. The iterator type is `Block`. +pub type SuccIter<'a> = bforest::SetIter<'a, Block>; + +#[cfg(test)] +mod tests { + use super::*; + use crate::cursor::{Cursor, FuncCursor}; + use crate::ir::{types, InstBuilder}; + use alloc::vec::Vec; + + #[test] + fn empty() { + let func = Function::new(); + ControlFlowGraph::with_function(&func); + } + + #[test] + fn no_predecessors() { + let mut func = Function::new(); + let block0 = func.dfg.make_block(); + let block1 = func.dfg.make_block(); + let block2 = func.dfg.make_block(); + func.layout.append_block(block0); + func.layout.append_block(block1); + func.layout.append_block(block2); + + let cfg = ControlFlowGraph::with_function(&func); + + let mut fun_blocks = func.layout.blocks(); + for block in func.layout.blocks() { + assert_eq!(block, fun_blocks.next().unwrap()); + assert_eq!(cfg.pred_iter(block).count(), 0); + assert_eq!(cfg.succ_iter(block).count(), 0); + } + } + + #[test] + fn branches_and_jumps() { + let mut func = Function::new(); + let block0 = func.dfg.make_block(); + let cond = func.dfg.append_block_param(block0, types::I32); + let block1 = func.dfg.make_block(); + let block2 = func.dfg.make_block(); + + let br_block0_block2_block1; + let br_block1_block1_block2; + + { + let mut cur = FuncCursor::new(&mut func); + + cur.insert_block(block0); + br_block0_block2_block1 = cur.ins().brif(cond, block2, &[], block1, &[]); + + cur.insert_block(block1); + br_block1_block1_block2 = cur.ins().brif(cond, block1, &[], block2, &[]); + + cur.insert_block(block2); + } + + let mut cfg = ControlFlowGraph::with_function(&func); + + { + let block0_predecessors = cfg.pred_iter(block0).collect::>(); + let block1_predecessors = cfg.pred_iter(block1).collect::>(); + let block2_predecessors = cfg.pred_iter(block2).collect::>(); + + let block0_successors = cfg.succ_iter(block0).collect::>(); + let block1_successors = cfg.succ_iter(block1).collect::>(); + let block2_successors = cfg.succ_iter(block2).collect::>(); + + assert_eq!(block0_predecessors.len(), 0); + assert_eq!(block1_predecessors.len(), 2); + assert_eq!(block2_predecessors.len(), 2); + + assert_eq!( + block1_predecessors + .contains(&BlockPredecessor::new(block0, br_block0_block2_block1)), + true + ); + assert_eq!( + block1_predecessors + .contains(&BlockPredecessor::new(block1, br_block1_block1_block2)), + true + ); + assert_eq!( + block2_predecessors + .contains(&BlockPredecessor::new(block0, br_block0_block2_block1)), + true + ); + assert_eq!( + block2_predecessors + .contains(&BlockPredecessor::new(block1, br_block1_block1_block2)), + true + ); + + assert_eq!(block0_successors, [block1, block2]); + assert_eq!(block1_successors, [block1, block2]); + assert_eq!(block2_successors, []); + } + + // Add a new block to hold a return instruction + let ret_block = func.dfg.make_block(); + + { + let mut cur = FuncCursor::new(&mut func); + cur.insert_block(ret_block); + cur.ins().return_(&[]); + } + + // Change some instructions and recompute block0 and ret_block + func.dfg + .replace(br_block0_block2_block1) + .brif(cond, block1, &[], ret_block, &[]); + cfg.recompute_block(&func, block0); + cfg.recompute_block(&func, ret_block); + let br_block0_block1_ret_block = br_block0_block2_block1; + + { + let block0_predecessors = cfg.pred_iter(block0).collect::>(); + let block1_predecessors = cfg.pred_iter(block1).collect::>(); + let block2_predecessors = cfg.pred_iter(block2).collect::>(); + + let block0_successors = cfg.succ_iter(block0); + let block1_successors = cfg.succ_iter(block1); + let block2_successors = cfg.succ_iter(block2); + + assert_eq!(block0_predecessors.len(), 0); + assert_eq!(block1_predecessors.len(), 2); + assert_eq!(block2_predecessors.len(), 1); + + assert_eq!( + block1_predecessors + .contains(&BlockPredecessor::new(block0, br_block0_block1_ret_block)), + true + ); + assert_eq!( + block1_predecessors + .contains(&BlockPredecessor::new(block1, br_block1_block1_block2)), + true + ); + assert_eq!( + block2_predecessors + .contains(&BlockPredecessor::new(block0, br_block0_block1_ret_block)), + false + ); + assert_eq!( + block2_predecessors + .contains(&BlockPredecessor::new(block1, br_block1_block1_block2)), + true + ); + + assert_eq!(block0_successors.collect::>(), [block1, ret_block]); + assert_eq!(block1_successors.collect::>(), [block1, block2]); + assert_eq!(block2_successors.collect::>(), []); + } + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/incremental_cache.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/incremental_cache.rs new file mode 100644 index 000000000..89e42cf24 --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/incremental_cache.rs @@ -0,0 +1,256 @@ +//! This module provides a set of primitives that allow implementing an incremental cache on top of +//! Cranelift, making it possible to reuse previous compiled artifacts for functions that have been +//! compiled previously. +//! +//! This set of operation is experimental and can be enabled using the Cargo feature +//! `incremental-cache`. +//! +//! This can bring speedups in different cases: change-code-and-immediately-recompile iterations +//! get faster, modules sharing lots of code can reuse each other's artifacts, etc. +//! +//! The three main primitives are the following: +//! - `compute_cache_key` is used to compute the cache key associated to a `Function`. This is +//! basically the content of the function, modulo a few things the caching system is resilient to. +//! - `serialize_compiled` is used to serialize the result of a compilation, so it can be reused +//! later on by... +//! - `try_finish_recompile`, which reads binary blobs serialized with `serialize_compiled`, +//! re-creating the compilation artifact from those. +//! +//! The `CacheStore` trait and `Context::compile_with_cache` method are provided as +//! high-level, easy-to-use facilities to make use of that cache, and show an example of how to use +//! the above three primitives to form a full incremental caching system. + +use core::fmt; + +use crate::alloc::string::String; +use crate::alloc::vec::Vec; +use crate::ir::function::{FunctionStencil, VersionMarker}; +use crate::ir::Function; +use crate::machinst::{CompiledCode, CompiledCodeStencil}; +use crate::result::CompileResult; +use crate::{isa::TargetIsa, timing}; +use crate::{trace, CompileError, Context}; +use alloc::borrow::{Cow, ToOwned as _}; +use alloc::string::ToString as _; +use cranelift_control::ControlPlane; + +impl Context { + /// Compile the function, as in `compile`, but tries to reuse compiled artifacts from former + /// compilations using the provided cache store. + pub fn compile_with_cache( + &mut self, + isa: &dyn TargetIsa, + cache_store: &mut dyn CacheKvStore, + ctrl_plane: &mut ControlPlane, + ) -> CompileResult<(&CompiledCode, bool)> { + let cache_key_hash = { + let _tt = timing::try_incremental_cache(); + + let cache_key_hash = compute_cache_key(isa, &self.func); + + if let Some(blob) = cache_store.get(&cache_key_hash.0) { + match try_finish_recompile(&self.func, &blob) { + Ok(compiled_code) => { + let info = compiled_code.code_info(); + + if isa.flags().enable_incremental_compilation_cache_checks() { + let actual_result = self.compile(isa, ctrl_plane)?; + assert_eq!(*actual_result, compiled_code); + assert_eq!(actual_result.code_info(), info); + // no need to set `compiled_code` here, it's set by `compile()`. + return Ok((actual_result, true)); + } + + let compiled_code = self.compiled_code.insert(compiled_code); + return Ok((compiled_code, true)); + } + Err(err) => { + trace!("error when finishing recompilation: {err}"); + } + } + } + + cache_key_hash + }; + + let stencil = self + .compile_stencil(isa, ctrl_plane) + .map_err(|err| CompileError { + inner: err, + func: &self.func, + })?; + + let stencil = { + let _tt = timing::store_incremental_cache(); + let (stencil, res) = serialize_compiled(stencil); + if let Ok(blob) = res { + cache_store.insert(&cache_key_hash.0, blob); + } + stencil + }; + + let compiled_code = self + .compiled_code + .insert(stencil.apply_params(&self.func.params)); + + Ok((compiled_code, false)) + } +} + +/// Backing storage for an incremental compilation cache, when enabled. +pub trait CacheKvStore { + /// Given a cache key hash, retrieves the associated opaque serialized data. + fn get(&self, key: &[u8]) -> Option>; + + /// Given a new cache key and a serialized blob obtained from `serialize_compiled`, stores it + /// in the cache store. + fn insert(&mut self, key: &[u8], val: Vec); +} + +/// Hashed `CachedKey`, to use as an identifier when looking up whether a function has already been +/// compiled or not. +#[derive(Clone, Hash, PartialEq, Eq)] +pub struct CacheKeyHash([u8; 32]); + +impl std::fmt::Display for CacheKeyHash { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "CacheKeyHash:{:?}", self.0) + } +} + +#[derive(serde_derive::Serialize, serde_derive::Deserialize)] +struct CachedFunc { + // Note: The version marker must be first to ensure deserialization stops in case of a version + // mismatch before attempting to deserialize the actual compiled code. + version_marker: VersionMarker, + stencil: CompiledCodeStencil, +} + +/// Key for caching a single function's compilation. +/// +/// If two functions get the same `CacheKey`, then we can reuse the compiled artifacts, modulo some +/// fixups. +/// +/// Note: the key will be invalidated across different versions of cranelift, as the +/// `FunctionStencil` contains a `VersionMarker` itself. +#[derive(Hash)] +struct CacheKey<'a> { + stencil: &'a FunctionStencil, + parameters: CompileParameters, +} + +#[derive(Clone, PartialEq, Hash, serde_derive::Serialize, serde_derive::Deserialize)] +struct CompileParameters { + isa: String, + triple: String, + flags: String, + isa_flags: Vec, +} + +impl CompileParameters { + fn from_isa(isa: &dyn TargetIsa) -> Self { + Self { + isa: isa.name().to_owned(), + triple: isa.triple().to_string(), + flags: isa.flags().to_string(), + isa_flags: isa + .isa_flags() + .into_iter() + .map(|v| v.value_string()) + .collect(), + } + } +} + +impl<'a> CacheKey<'a> { + /// Creates a new cache store key for a function. + /// + /// This is a bit expensive to compute, so it should be cached and reused as much as possible. + fn new(isa: &dyn TargetIsa, f: &'a Function) -> Self { + CacheKey { + stencil: &f.stencil, + parameters: CompileParameters::from_isa(isa), + } + } +} + +/// Compute a cache key, and hash it on your behalf. +/// +/// Since computing the `CacheKey` is a bit expensive, it should be done as least as possible. +pub fn compute_cache_key(isa: &dyn TargetIsa, func: &Function) -> CacheKeyHash { + use core::hash::{Hash as _, Hasher}; + use sha2::Digest as _; + + struct Sha256Hasher(sha2::Sha256); + + impl Hasher for Sha256Hasher { + fn finish(&self) -> u64 { + panic!("Sha256Hasher doesn't support finish!"); + } + fn write(&mut self, bytes: &[u8]) { + self.0.update(bytes); + } + } + + let cache_key = CacheKey::new(isa, func); + + let mut hasher = Sha256Hasher(sha2::Sha256::new()); + cache_key.hash(&mut hasher); + let hash: [u8; 32] = hasher.0.finalize().into(); + + CacheKeyHash(hash) +} + +/// Given a function that's been successfully compiled, serialize it to a blob that the caller may +/// store somewhere for future use by `try_finish_recompile`. +/// +/// As this function requires ownership on the `CompiledCodeStencil`, it gives it back at the end +/// of the function call. The value is left untouched. +pub fn serialize_compiled( + result: CompiledCodeStencil, +) -> (CompiledCodeStencil, Result, postcard::Error>) { + let cached = CachedFunc { + version_marker: VersionMarker, + stencil: result, + }; + let result = postcard::to_allocvec(&cached); + (cached.stencil, result) +} + +/// An error returned when recompiling failed. +#[derive(Debug)] +pub enum RecompileError { + /// The version embedded in the cache entry isn't the same as cranelift's current version. + VersionMismatch, + /// An error occurred while deserializing the cache entry. + Deserialize(postcard::Error), +} + +impl fmt::Display for RecompileError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + RecompileError::VersionMismatch => write!(f, "cranelift version mismatch",), + RecompileError::Deserialize(err) => { + write!(f, "postcard failed during deserialization: {err}") + } + } + } +} + +/// Given a function that's been precompiled and its entry in the caching storage, try to shortcut +/// compilation of the given function. +/// +/// Precondition: the bytes must have retrieved from a cache store entry which hash value +/// is strictly the same as the `Function`'s computed hash retrieved from `compute_cache_key`. +pub fn try_finish_recompile(func: &Function, bytes: &[u8]) -> Result { + match postcard::from_bytes::(bytes) { + Ok(result) => { + if result.version_marker != func.stencil.version_marker { + Err(RecompileError::VersionMismatch) + } else { + Ok(result.stencil.apply_params(&func.params)) + } + } + Err(err) => Err(RecompileError::Deserialize(err)), + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/inst_predicates.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/inst_predicates.rs new file mode 100644 index 000000000..7a345264c --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/inst_predicates.rs @@ -0,0 +1,206 @@ +//! Instruction predicates/properties, shared by various analyses. +use crate::ir::immediates::Offset32; +use crate::ir::{self, Block, Function, Inst, InstructionData, Opcode, Type, Value}; + +/// Test whether the given opcode is unsafe to even consider as side-effect-free. +#[inline(always)] +fn trivially_has_side_effects(opcode: Opcode) -> bool { + opcode.is_call() + || opcode.is_branch() + || opcode.is_terminator() + || opcode.is_return() + || opcode.can_trap() + || opcode.other_side_effects() + || opcode.can_store() +} + +/// Load instructions without the `notrap` flag are defined to trap when +/// operating on inaccessible memory, so we can't treat them as side-effect-free even if the loaded +/// value is unused. +#[inline(always)] +fn is_load_with_defined_trapping(opcode: Opcode, data: &InstructionData) -> bool { + if !opcode.can_load() { + return false; + } + match *data { + InstructionData::StackLoad { .. } => false, + InstructionData::Load { flags, .. } => !flags.notrap(), + _ => true, + } +} + +/// Does the given instruction have any side-effect that would preclude it from being removed when +/// its value is unused? +#[inline(always)] +fn has_side_effect(func: &Function, inst: Inst) -> bool { + let data = &func.dfg.insts[inst]; + let opcode = data.opcode(); + trivially_has_side_effects(opcode) || is_load_with_defined_trapping(opcode, data) +} + +/// Does the given instruction behave as a "pure" node with respect to +/// aegraph semantics? +/// +/// - Trivially pure nodes (bitwise arithmetic, etc) +/// - Loads with the `readonly`, `notrap`, and `can_move` flags set +pub fn is_pure_for_egraph(func: &Function, inst: Inst) -> bool { + let is_pure_load = match func.dfg.insts[inst] { + InstructionData::Load { + opcode: Opcode::Load, + flags, + .. + } => flags.readonly() && flags.notrap() && flags.can_move(), + _ => false, + }; + + // Multi-value results do not play nicely with much of the egraph + // infrastructure. They are in practice used only for multi-return + // calls and some other odd instructions (e.g. uadd_overflow) which, + // for now, we can afford to leave in place as opaque + // side-effecting ops. So if more than one result, then the inst + // is "not pure". Similarly, ops with zero results can be used + // only for their side-effects, so are never pure. (Or if they + // are, we can always trivially eliminate them with no effect.) + let has_one_result = func.dfg.inst_results(inst).len() == 1; + + let op = func.dfg.insts[inst].opcode(); + + has_one_result && (is_pure_load || (!op.can_load() && !trivially_has_side_effects(op))) +} + +/// Can the given instruction be merged into another copy of itself? +/// These instructions may have side-effects, but as long as we retain +/// the first instance of the instruction, the second and further +/// instances are redundant if they would produce the same trap or +/// result. +pub fn is_mergeable_for_egraph(func: &Function, inst: Inst) -> bool { + let op = func.dfg.insts[inst].opcode(); + // We can only merge zero- and one-result operators due to the way that GVN + // is structured in the egraph implementation. + func.dfg.inst_results(inst).len() <= 1 + // Loads/stores are handled by alias analysis and not + // otherwise mergeable. + && !op.can_load() + && !op.can_store() + // Can only have idempotent side-effects. + && (!has_side_effect(func, inst) || op.side_effects_idempotent()) +} + +/// Does the given instruction have any side-effect as per [has_side_effect], or else is a load, +/// but not the get_pinned_reg opcode? +pub fn has_lowering_side_effect(func: &Function, inst: Inst) -> bool { + let op = func.dfg.insts[inst].opcode(); + op != Opcode::GetPinnedReg && (has_side_effect(func, inst) || op.can_load()) +} + +/// Is the given instruction a constant value (`iconst`, `fconst`) that can be +/// represented in 64 bits? +pub fn is_constant_64bit(func: &Function, inst: Inst) -> Option { + match &func.dfg.insts[inst] { + &InstructionData::UnaryImm { imm, .. } => Some(imm.bits() as u64), + &InstructionData::UnaryIeee16 { imm, .. } => Some(imm.bits() as u64), + &InstructionData::UnaryIeee32 { imm, .. } => Some(imm.bits() as u64), + &InstructionData::UnaryIeee64 { imm, .. } => Some(imm.bits()), + _ => None, + } +} + +/// Get the address, offset, and access type from the given instruction, if any. +pub fn inst_addr_offset_type(func: &Function, inst: Inst) -> Option<(Value, Offset32, Type)> { + match &func.dfg.insts[inst] { + InstructionData::Load { arg, offset, .. } => { + let ty = func.dfg.value_type(func.dfg.inst_results(inst)[0]); + Some((*arg, *offset, ty)) + } + InstructionData::LoadNoOffset { arg, .. } => { + let ty = func.dfg.value_type(func.dfg.inst_results(inst)[0]); + Some((*arg, 0.into(), ty)) + } + InstructionData::Store { args, offset, .. } => { + let ty = func.dfg.value_type(args[0]); + Some((args[1], *offset, ty)) + } + InstructionData::StoreNoOffset { args, .. } => { + let ty = func.dfg.value_type(args[0]); + Some((args[1], 0.into(), ty)) + } + _ => None, + } +} + +/// Get the store data, if any, from an instruction. +pub fn inst_store_data(func: &Function, inst: Inst) -> Option { + match &func.dfg.insts[inst] { + InstructionData::Store { args, .. } | InstructionData::StoreNoOffset { args, .. } => { + Some(args[0]) + } + _ => None, + } +} + +/// Determine whether this opcode behaves as a memory fence, i.e., +/// prohibits any moving of memory accesses across it. +pub fn has_memory_fence_semantics(op: Opcode) -> bool { + match op { + Opcode::AtomicRmw + | Opcode::AtomicCas + | Opcode::AtomicLoad + | Opcode::AtomicStore + | Opcode::Fence + | Opcode::Debugtrap => true, + Opcode::Call | Opcode::CallIndirect => true, + op if op.can_trap() => true, + _ => false, + } +} + +/// Visit all successors of a block with a given visitor closure. The closure +/// arguments are the branch instruction that is used to reach the successor, +/// the successor block itself, and a flag indicating whether the block is +/// branched to via a table entry. +pub(crate) fn visit_block_succs( + f: &Function, + block: Block, + mut visit: F, +) { + if let Some(inst) = f.layout.last_inst(block) { + match &f.dfg.insts[inst] { + ir::InstructionData::Jump { + destination: dest, .. + } => { + visit(inst, dest.block(&f.dfg.value_lists), false); + } + + ir::InstructionData::Brif { + blocks: [block_then, block_else], + .. + } => { + visit(inst, block_then.block(&f.dfg.value_lists), false); + visit(inst, block_else.block(&f.dfg.value_lists), false); + } + + ir::InstructionData::BranchTable { table, .. } => { + let pool = &f.dfg.value_lists; + let table = &f.stencil.dfg.jump_tables[*table]; + + // The default block is reached via a direct conditional branch, + // so it is not part of the table. We visit the default block + // first explicitly, to mirror the traversal order of + // `JumpTableData::all_branches`, and transitively the order of + // `InstructionData::branch_destination`. + // + // Additionally, this case is why we are unable to replace this + // whole function with a loop over `branch_destination`: we need + // to report which branch targets come from the table vs the + // default. + visit(inst, table.default_block().block(pool), false); + + for dest in table.as_slice() { + visit(inst, dest.block(pool), true); + } + } + + inst => debug_assert!(!inst.opcode().is_branch()), + } + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/inst_specs.isle b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/inst_specs.isle new file mode 100644 index 000000000..83f703e45 --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/inst_specs.isle @@ -0,0 +1,242 @@ +(model Imm64 (type (bv 64))) + +(model IntCC (enum + (Equal #x00) + (NotEqual #x01) + (SignedGreaterThan #x02) + (SignedGreaterThanOrEqual #x03) + (SignedLessThan #x04) + (SignedLessThanOrEqual #x05) + (UnsignedGreaterThan #x06) + (UnsignedGreaterThanOrEqual #x07) + (UnsignedLessThan #x08) + (UnsignedLessThanOrEqual #x09))) + +(spec (smin x y) + (provide (= result (if (bvsle x y) x y)))) +(instantiate smin bv_binary_8_to_64) + +(spec (umin x y) + (provide (= result (if (bvule x y) x y)))) +(instantiate umin bv_binary_8_to_64) + +(spec (smax x y) + (provide (= result (if (bvsge x y) x y)))) +(instantiate smax bv_binary_8_to_64) + +(spec (umax x y) + (provide (= result (if (bvuge x y) x y)))) +(instantiate umax bv_binary_8_to_64) + +(spec (iconst arg) + (provide (= arg (zero_ext 64 result)))) +(instantiate iconst + ((args (bv 64)) (ret (bv 8)) (canon (bv 8))) + ((args (bv 64)) (ret (bv 16)) (canon (bv 16))) + ((args (bv 64)) (ret (bv 32)) (canon (bv 32))) + ((args (bv 64)) (ret (bv 64)) (canon (bv 64))) +) + +(spec (bitselect c x y) + (provide (= result (bvor (bvand c x) (bvand (bvnot c) y))))) +(instantiate bitselect bv_ternary_8_to_64) + +(spec (icmp c x y) + (provide + (= result + (switch c + ((IntCC.Equal) (if (= x y) #x01 #x00)) + ((IntCC.NotEqual) (if (not (= x y)) #x01 #x00)) + ((IntCC.SignedGreaterThan) (if (bvsgt x y) #x01 #x00)) + ((IntCC.SignedGreaterThanOrEqual) (if (bvsge x y) #x01 #x00)) + ((IntCC.SignedLessThan) (if (bvslt x y) #x01 #x00)) + ((IntCC.SignedLessThanOrEqual) (if (bvsle x y) #x01 #x00)) + ((IntCC.UnsignedGreaterThan) (if (bvugt x y) #x01 #x00)) + ((IntCC.UnsignedGreaterThanOrEqual) (if (bvuge x y) #x01 #x00)) + ((IntCC.UnsignedLessThan) (if (bvult x y) #x01 #x00)) + ((IntCC.UnsignedLessThanOrEqual) (if (bvule x y) #x01 #x00))))) + (require + ;; AVH TODO: if we understand enums semantically, we can generate this + (or + (= c (IntCC.Equal)) + (= c (IntCC.NotEqual)) + (= c (IntCC.UnsignedGreaterThanOrEqual)) + (= c (IntCC.UnsignedGreaterThan)) + (= c (IntCC.UnsignedLessThanOrEqual)) + (= c (IntCC.UnsignedLessThan)) + (= c (IntCC.SignedGreaterThanOrEqual)) + (= c (IntCC.SignedGreaterThan)) + (= c (IntCC.SignedLessThanOrEqual)) + (= c (IntCC.SignedLessThan))))) +(instantiate icmp + ((args (bv 8) (bv 8) (bv 8)) (ret (bv 8)) (canon (bv 8))) + ((args (bv 8) (bv 16) (bv 16)) (ret (bv 8)) (canon (bv 16))) + ((args (bv 8) (bv 32) (bv 32)) (ret (bv 8)) (canon (bv 32))) + ((args (bv 8) (bv 64) (bv 64)) (ret (bv 8)) (canon (bv 64))) +) + +(spec (iadd x y) + (provide (= result (bvadd x y)))) +(instantiate iadd bv_binary_8_to_64) + +(spec (isub x y) + (provide (= result (bvsub x y)))) +(instantiate isub bv_binary_8_to_64) + +(spec (ineg x) + (provide (= result (bvneg x)))) +(instantiate ineg bv_unary_8_to_64) + +(spec (iabs x) + (provide (= result + (if (bvsge x (conv_to (widthof x) #x0000000000000000)) + x + (bvneg x))))) +(instantiate iabs bv_unary_8_to_64) + +(spec (imul x y) + (provide (= result (bvmul x y)))) +(instantiate imul bv_binary_8_to_64) + +(spec (udiv x y) + (provide (= result (bvudiv x y))) + (require (not (= y (zero_ext (widthof y) #b0))))) +(instantiate udiv bv_binary_8_to_64) + +(spec (sdiv x y) + (provide (= result (bvsdiv x y))) + (require (not (= y (zero_ext (widthof y) #b0))))) +(instantiate sdiv bv_binary_8_to_64) + +(spec (urem x y) + (provide (= result (bvurem x y))) + (require (not (= y (zero_ext (widthof y) #b0))))) +(instantiate urem bv_binary_8_to_64) + +(spec (srem x y) + (provide (= result (bvsrem x y))) + (require (not (= y (zero_ext (widthof y) #b0))))) +(instantiate srem bv_binary_8_to_64) + +(spec (imul_imm x y) + (provide (= result (bvmul (sign_ext 64 x) y)))) + +(spec (band x y) + (provide (= result (bvand x y)))) +(instantiate band bv_binary_8_to_64) + +(spec (bor x y) + (provide (= result (bvor x y)))) +(instantiate bor bv_binary_8_to_64) + +(spec (bxor x y) + (provide (= result (bvxor x y)))) +(instantiate bxor bv_binary_8_to_64) + +(spec (bnot x) + (provide (= result (bvnot x))) + (require (or (= (widthof x) 8) (= (widthof x) 16) (= (widthof x) 32) (= (widthof x) 64)))) +(instantiate bnot bv_unary_8_to_64) + +(spec (band_not x y) + (provide (= result (bvand x (bvnot y))))) +(instantiate band_not bv_binary_8_to_64) + +(spec (rotl x y) + (provide (= result (rotl x y)))) +(instantiate rotl bv_binary_8_to_64) + +(spec (rotr x y) + (provide (= result (rotr x y)))) +(instantiate rotr bv_binary_8_to_64) + +;; fn shift_mask(&mut self, ty: Type) -> ImmLogic { +;; let mask = (ty.lane_bits() - 1) as u64; +;; ImmLogic::maybe_from_u64(mask, I32).unwrap() +;; } +(spec (ishl x y) + (provide + (= result + (bvshl x + (bvand (conv_to (widthof y) (bvsub (int2bv 64 (widthof y)) + #x0000000000000001)) + y))))) +(instantiate ishl bv_binary_8_to_64) + +(spec (ushr x y) + (provide + (= result + (bvlshr x + (bvand (conv_to (widthof y) (bvsub (int2bv 64 (widthof y)) + #x0000000000000001)) + y))))) +(instantiate ushr bv_binary_8_to_64) + +(spec (sshr x y) + (provide + (= result + (bvashr x + (bvand (conv_to (widthof y) (bvsub (int2bv 64 (widthof y)) + #x0000000000000001)) + y))))) +(instantiate sshr bv_binary_8_to_64) + +(spec (clz x) + (provide (= result (clz x)))) +(instantiate clz bv_unary_8_to_64) + +(spec (cls x) + (provide (= result (cls x)))) +(instantiate cls bv_unary_8_to_64) + +(spec (ctz x) + (provide (= result (clz (rev x))))) +(instantiate ctz bv_unary_8_to_64) + +(spec (popcnt x) + (provide (= result (popcnt x)))) +(instantiate popcnt bv_unary_8_to_64) + +(form extend + ((args (bv 8)) (ret (bv 8)) (canon (bv 8))) + ((args (bv 8)) (ret (bv 16)) (canon (bv 8))) + ((args (bv 8)) (ret (bv 32)) (canon (bv 8))) + ((args (bv 8)) (ret (bv 64)) (canon (bv 8))) + ((args (bv 16)) (ret (bv 16)) (canon (bv 16))) + ((args (bv 16)) (ret (bv 32)) (canon (bv 16))) + ((args (bv 16)) (ret (bv 64)) (canon (bv 16))) + ((args (bv 32)) (ret (bv 32)) (canon (bv 32))) + ((args (bv 32)) (ret (bv 64)) (canon (bv 32))) + ((args (bv 64)) (ret (bv 64)) (canon (bv 64))) +) + +(spec (uextend x) + (provide (= result (zero_ext (widthof result) x)))) +(instantiate uextend extend) + +(spec (sextend x) + (provide (= result (sign_ext (widthof result) x)))) +(instantiate sextend extend) + + +(form load + ((args (bv 16) (bv 64) (bv 32)) (ret (bv 8)) (canon (bv 8))) + ((args (bv 16) (bv 64) (bv 32)) (ret (bv 16)) (canon (bv 16))) + ((args (bv 16) (bv 64) (bv 32)) (ret (bv 32)) (canon (bv 32))) + ((args (bv 16) (bv 64) (bv 32)) (ret (bv 64)) (canon (bv 64))) +) +(spec (load flags val offset) + (provide + (= result (load_effect flags (widthof result) (bvadd val (sign_ext 64 offset)))))) +(instantiate load load) + +(form store + ((args (bv 16) (bv 8) (bv 64) (bv 32)) (ret Unit) (canon (bv 8))) + ((args (bv 16) (bv 16) (bv 64) (bv 32)) (ret Unit) (canon (bv 16))) + ((args (bv 16) (bv 32) (bv 64) (bv 32)) (ret Unit) (canon (bv 32))) + ((args (bv 16) (bv 64) (bv 64) (bv 32)) (ret Unit) (canon (bv 64))) +) +(spec (store flags val_to_store addr offset) + (provide + (= result (store_effect flags (widthof val_to_store) val_to_store (bvadd (zero_ext 64 addr) (sign_ext 64 offset)))))) +(instantiate store store) diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/atomic_rmw_op.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/atomic_rmw_op.rs new file mode 100644 index 000000000..0317fc00d --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/atomic_rmw_op.rs @@ -0,0 +1,104 @@ +/// Describes the arithmetic operation in an atomic memory read-modify-write operation. +use core::fmt::{self, Display, Formatter}; +use core::str::FromStr; +#[cfg(feature = "enable-serde")] +use serde_derive::{Deserialize, Serialize}; + +#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +/// Describes the arithmetic operation in an atomic memory read-modify-write operation. +pub enum AtomicRmwOp { + /// Add + Add, + /// Sub + Sub, + /// And + And, + /// Nand + Nand, + /// Or + Or, + /// Xor + Xor, + /// Exchange + Xchg, + /// Unsigned min + Umin, + /// Unsigned max + Umax, + /// Signed min + Smin, + /// Signed max + Smax, +} + +impl AtomicRmwOp { + /// Returns a slice with all supported [AtomicRmwOp]'s. + pub fn all() -> &'static [AtomicRmwOp] { + &[ + AtomicRmwOp::Add, + AtomicRmwOp::Sub, + AtomicRmwOp::And, + AtomicRmwOp::Nand, + AtomicRmwOp::Or, + AtomicRmwOp::Xor, + AtomicRmwOp::Xchg, + AtomicRmwOp::Umin, + AtomicRmwOp::Umax, + AtomicRmwOp::Smin, + AtomicRmwOp::Smax, + ] + } +} + +impl Display for AtomicRmwOp { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + let s = match self { + AtomicRmwOp::Add => "add", + AtomicRmwOp::Sub => "sub", + AtomicRmwOp::And => "and", + AtomicRmwOp::Nand => "nand", + AtomicRmwOp::Or => "or", + AtomicRmwOp::Xor => "xor", + AtomicRmwOp::Xchg => "xchg", + AtomicRmwOp::Umin => "umin", + AtomicRmwOp::Umax => "umax", + AtomicRmwOp::Smin => "smin", + AtomicRmwOp::Smax => "smax", + }; + f.write_str(s) + } +} + +impl FromStr for AtomicRmwOp { + type Err = (); + fn from_str(s: &str) -> Result { + match s { + "add" => Ok(AtomicRmwOp::Add), + "sub" => Ok(AtomicRmwOp::Sub), + "and" => Ok(AtomicRmwOp::And), + "nand" => Ok(AtomicRmwOp::Nand), + "or" => Ok(AtomicRmwOp::Or), + "xor" => Ok(AtomicRmwOp::Xor), + "xchg" => Ok(AtomicRmwOp::Xchg), + "umin" => Ok(AtomicRmwOp::Umin), + "umax" => Ok(AtomicRmwOp::Umax), + "smin" => Ok(AtomicRmwOp::Smin), + "smax" => Ok(AtomicRmwOp::Smax), + _ => Err(()), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn roundtrip_parse() { + for op in AtomicRmwOp::all() { + let roundtripped = format!("{op}").parse::().unwrap(); + assert_eq!(*op, roundtripped); + } + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/builder.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/builder.rs new file mode 100644 index 000000000..6032f07f5 --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/builder.rs @@ -0,0 +1,282 @@ +//! Cranelift instruction builder. +//! +//! A `Builder` provides a convenient interface for inserting instructions into a Cranelift +//! function. Many of its methods are generated from the meta language instruction definitions. + +use crate::ir; +use crate::ir::instructions::InstructionFormat; +use crate::ir::types; +use crate::ir::{DataFlowGraph, InstructionData}; +use crate::ir::{Inst, Opcode, Type, Value}; + +/// Base trait for instruction builders. +/// +/// The `InstBuilderBase` trait provides the basic functionality required by the methods of the +/// generated `InstBuilder` trait. These methods should not normally be used directly. Use the +/// methods in the `InstBuilder` trait instead. +/// +/// Any data type that implements `InstBuilderBase` also gets all the methods of the `InstBuilder` +/// trait. +pub trait InstBuilderBase<'f>: Sized { + /// Get an immutable reference to the data flow graph that will hold the constructed + /// instructions. + fn data_flow_graph(&self) -> &DataFlowGraph; + /// Get a mutable reference to the data flow graph that will hold the constructed + /// instructions. + fn data_flow_graph_mut(&mut self) -> &mut DataFlowGraph; + + /// Insert an instruction and return a reference to it, consuming the builder. + /// + /// The result types may depend on a controlling type variable. For non-polymorphic + /// instructions with multiple results, pass `INVALID` for the `ctrl_typevar` argument. + fn build(self, data: InstructionData, ctrl_typevar: Type) -> (Inst, &'f mut DataFlowGraph); +} + +// Include trait code generated by `cranelift-codegen/meta/src/gen_inst.rs`. +// +// This file defines the `InstBuilder` trait as an extension of `InstBuilderBase` with methods per +// instruction format and per opcode. +include!(concat!(env!("OUT_DIR"), "/inst_builder.rs")); + +/// Any type implementing `InstBuilderBase` gets all the `InstBuilder` methods for free. +impl<'f, T: InstBuilderBase<'f>> InstBuilder<'f> for T {} + +/// Base trait for instruction inserters. +/// +/// This is an alternative base trait for an instruction builder to implement. +/// +/// An instruction inserter can be adapted into an instruction builder by wrapping it in an +/// `InsertBuilder`. This provides some common functionality for instruction builders that insert +/// new instructions, as opposed to the `ReplaceBuilder` which overwrites existing instructions. +pub trait InstInserterBase<'f>: Sized { + /// Get an immutable reference to the data flow graph. + fn data_flow_graph(&self) -> &DataFlowGraph; + + /// Get a mutable reference to the data flow graph. + fn data_flow_graph_mut(&mut self) -> &mut DataFlowGraph; + + /// Insert a new instruction which belongs to the DFG. + fn insert_built_inst(self, inst: Inst) -> &'f mut DataFlowGraph; +} + +use core::marker::PhantomData; + +/// Builder that inserts an instruction at the current position. +/// +/// An `InsertBuilder` is a wrapper for an `InstInserterBase` that turns it into an instruction +/// builder with some additional facilities for creating instructions that reuse existing values as +/// their results. +pub struct InsertBuilder<'f, IIB: InstInserterBase<'f>> { + inserter: IIB, + unused: PhantomData<&'f u32>, +} + +impl<'f, IIB: InstInserterBase<'f>> InsertBuilder<'f, IIB> { + /// Create a new builder which inserts instructions at `pos`. + /// The `dfg` and `pos.layout` references should be from the same `Function`. + pub fn new(inserter: IIB) -> Self { + Self { + inserter, + unused: PhantomData, + } + } + + /// Reuse result values in `reuse`. + /// + /// Convert this builder into one that will reuse the provided result values instead of + /// allocating new ones. The provided values for reuse must not be attached to anything. Any + /// missing result values will be allocated as normal. + /// + /// The `reuse` argument is expected to be an array of `Option`. + pub fn with_results(self, reuse: Array) -> InsertReuseBuilder<'f, IIB, Array> + where + Array: AsRef<[Option]>, + { + InsertReuseBuilder { + inserter: self.inserter, + reuse, + unused: PhantomData, + } + } + + /// Reuse a single result value. + /// + /// Convert this into a builder that will reuse `v` as the single result value. The reused + /// result value `v` must not be attached to anything. + /// + /// This method should only be used when building an instruction with exactly one result. Use + /// `with_results()` for the more general case. + pub fn with_result(self, v: Value) -> InsertReuseBuilder<'f, IIB, [Option; 1]> { + // TODO: Specialize this to return a different builder that just attaches `v` instead of + // calling `make_inst_results_reusing()`. + self.with_results([Some(v)]) + } +} + +impl<'f, IIB: InstInserterBase<'f>> InstBuilderBase<'f> for InsertBuilder<'f, IIB> { + fn data_flow_graph(&self) -> &DataFlowGraph { + self.inserter.data_flow_graph() + } + + fn data_flow_graph_mut(&mut self) -> &mut DataFlowGraph { + self.inserter.data_flow_graph_mut() + } + + fn build(mut self, data: InstructionData, ctrl_typevar: Type) -> (Inst, &'f mut DataFlowGraph) { + let inst; + { + let dfg = self.inserter.data_flow_graph_mut(); + inst = dfg.make_inst(data); + dfg.make_inst_results(inst, ctrl_typevar); + } + (inst, self.inserter.insert_built_inst(inst)) + } +} + +/// Builder that inserts a new instruction like `InsertBuilder`, but reusing result values. +pub struct InsertReuseBuilder<'f, IIB, Array> +where + IIB: InstInserterBase<'f>, + Array: AsRef<[Option]>, +{ + inserter: IIB, + reuse: Array, + unused: PhantomData<&'f u32>, +} + +impl<'f, IIB, Array> InstBuilderBase<'f> for InsertReuseBuilder<'f, IIB, Array> +where + IIB: InstInserterBase<'f>, + Array: AsRef<[Option]>, +{ + fn data_flow_graph(&self) -> &DataFlowGraph { + self.inserter.data_flow_graph() + } + + fn data_flow_graph_mut(&mut self) -> &mut DataFlowGraph { + self.inserter.data_flow_graph_mut() + } + + fn build(mut self, data: InstructionData, ctrl_typevar: Type) -> (Inst, &'f mut DataFlowGraph) { + let inst; + { + let dfg = self.inserter.data_flow_graph_mut(); + inst = dfg.make_inst(data); + // Make an `Iterator>`. + let ru = self.reuse.as_ref().iter().cloned(); + dfg.make_inst_results_reusing(inst, ctrl_typevar, ru); + } + (inst, self.inserter.insert_built_inst(inst)) + } +} + +/// Instruction builder that replaces an existing instruction. +/// +/// The inserted instruction will have the same `Inst` number as the old one. +/// +/// If the old instruction still has result values attached, it is assumed that the new instruction +/// produces the same number and types of results. The old result values are preserved. If the +/// replacement instruction format does not support multiple results, the builder panics. It is a +/// bug to leave result values dangling. +pub struct ReplaceBuilder<'f> { + dfg: &'f mut DataFlowGraph, + inst: Inst, +} + +impl<'f> ReplaceBuilder<'f> { + /// Create a `ReplaceBuilder` that will overwrite `inst`. + pub fn new(dfg: &'f mut DataFlowGraph, inst: Inst) -> Self { + Self { dfg, inst } + } +} + +impl<'f> InstBuilderBase<'f> for ReplaceBuilder<'f> { + fn data_flow_graph(&self) -> &DataFlowGraph { + self.dfg + } + + fn data_flow_graph_mut(&mut self) -> &mut DataFlowGraph { + self.dfg + } + + fn build(self, data: InstructionData, ctrl_typevar: Type) -> (Inst, &'f mut DataFlowGraph) { + // Splat the new instruction on top of the old one. + self.dfg.insts[self.inst] = data; + + if !self.dfg.has_results(self.inst) { + // The old result values were either detached or non-existent. + // Construct new ones. + self.dfg.make_inst_results(self.inst, ctrl_typevar); + } + + (self.inst, self.dfg) + } +} + +#[cfg(test)] +mod tests { + use crate::cursor::{Cursor, FuncCursor}; + use crate::ir::condcodes::*; + use crate::ir::types::*; + use crate::ir::{Function, InstBuilder, ValueDef}; + + #[test] + fn types() { + let mut func = Function::new(); + let block0 = func.dfg.make_block(); + let arg0 = func.dfg.append_block_param(block0, I32); + let mut pos = FuncCursor::new(&mut func); + pos.insert_block(block0); + + // Explicit types. + let v0 = pos.ins().iconst(I32, 3); + assert_eq!(pos.func.dfg.value_type(v0), I32); + + // Inferred from inputs. + let v1 = pos.ins().iadd(arg0, v0); + assert_eq!(pos.func.dfg.value_type(v1), I32); + + // Formula. + let cmp = pos.ins().icmp(IntCC::Equal, arg0, v0); + assert_eq!(pos.func.dfg.value_type(cmp), I8); + } + + #[test] + fn reuse_results() { + let mut func = Function::new(); + let block0 = func.dfg.make_block(); + let arg0 = func.dfg.append_block_param(block0, I32); + let mut pos = FuncCursor::new(&mut func); + pos.insert_block(block0); + + let v0 = pos.ins().iadd_imm(arg0, 17); + assert_eq!(pos.func.dfg.value_type(v0), I32); + let iadd = pos.prev_inst().unwrap(); + assert_eq!(pos.func.dfg.value_def(v0), ValueDef::Result(iadd, 0)); + + // Detach v0 and reuse it for a different instruction. + pos.func.dfg.clear_results(iadd); + let v0b = pos.ins().with_result(v0).iconst(I32, 3); + assert_eq!(v0, v0b); + assert_eq!(pos.current_inst(), Some(iadd)); + let iconst = pos.prev_inst().unwrap(); + assert!(iadd != iconst); + assert_eq!(pos.func.dfg.value_def(v0), ValueDef::Result(iconst, 0)); + } + + #[test] + #[should_panic] + #[cfg(debug_assertions)] + fn panics_when_inserting_wrong_opcode() { + use crate::ir::{Opcode, TrapCode}; + + let mut func = Function::new(); + let block0 = func.dfg.make_block(); + let mut pos = FuncCursor::new(&mut func); + pos.insert_block(block0); + + // We are trying to create a Opcode::Return with the InstData::Trap, which is obviously wrong + pos.ins() + .Trap(Opcode::Return, I32, TrapCode::BAD_CONVERSION_TO_INTEGER); + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/condcodes.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/condcodes.rs new file mode 100644 index 000000000..e791649bb --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/condcodes.rs @@ -0,0 +1,404 @@ +//! Condition codes for the Cranelift code generator. +//! +//! A condition code here is an enumerated type that determined how to compare two numbers. There +//! are different rules for comparing integers and floating point numbers, so they use different +//! condition codes. + +use core::fmt::{self, Display, Formatter}; +use core::str::FromStr; + +#[cfg(feature = "enable-serde")] +use serde_derive::{Deserialize, Serialize}; + +/// Common traits of condition codes. +pub trait CondCode: Copy { + /// Get the complemented condition code of `self`. + /// + /// The complemented condition code produces the opposite result for all comparisons. + /// That is, `cmp CC, x, y` is true if and only if `cmp CC.complement(), x, y` is false. + #[must_use] + fn complement(self) -> Self; + + /// Get the swapped args condition code for `self`. + /// + /// The swapped args condition code produces the same result as swapping `x` and `y` in the + /// comparison. That is, `cmp CC, x, y` is the same as `cmp CC.swap_args(), y, x`. + #[must_use] + fn swap_args(self) -> Self; +} + +/// Condition code for comparing integers. +/// +/// This condition code is used by the `icmp` instruction to compare integer values. There are +/// separate codes for comparing the integers as signed or unsigned numbers where it makes a +/// difference. +#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub enum IntCC { + /// `==`. + Equal, + /// `!=`. + NotEqual, + /// Signed `<`. + SignedLessThan, + /// Signed `>=`. + SignedGreaterThanOrEqual, + /// Signed `>`. + SignedGreaterThan, + /// Signed `<=`. + SignedLessThanOrEqual, + /// Unsigned `<`. + UnsignedLessThan, + /// Unsigned `>=`. + UnsignedGreaterThanOrEqual, + /// Unsigned `>`. + UnsignedGreaterThan, + /// Unsigned `<=`. + UnsignedLessThanOrEqual, +} + +impl CondCode for IntCC { + fn complement(self) -> Self { + use self::IntCC::*; + match self { + Equal => NotEqual, + NotEqual => Equal, + SignedLessThan => SignedGreaterThanOrEqual, + SignedGreaterThanOrEqual => SignedLessThan, + SignedGreaterThan => SignedLessThanOrEqual, + SignedLessThanOrEqual => SignedGreaterThan, + UnsignedLessThan => UnsignedGreaterThanOrEqual, + UnsignedGreaterThanOrEqual => UnsignedLessThan, + UnsignedGreaterThan => UnsignedLessThanOrEqual, + UnsignedLessThanOrEqual => UnsignedGreaterThan, + } + } + + fn swap_args(self) -> Self { + use self::IntCC::*; + match self { + Equal => Equal, + NotEqual => NotEqual, + SignedGreaterThan => SignedLessThan, + SignedGreaterThanOrEqual => SignedLessThanOrEqual, + SignedLessThan => SignedGreaterThan, + SignedLessThanOrEqual => SignedGreaterThanOrEqual, + UnsignedGreaterThan => UnsignedLessThan, + UnsignedGreaterThanOrEqual => UnsignedLessThanOrEqual, + UnsignedLessThan => UnsignedGreaterThan, + UnsignedLessThanOrEqual => UnsignedGreaterThanOrEqual, + } + } +} + +impl IntCC { + /// Returns a slice with all possible [IntCC] values. + pub fn all() -> &'static [IntCC] { + &[ + IntCC::Equal, + IntCC::NotEqual, + IntCC::SignedLessThan, + IntCC::SignedGreaterThanOrEqual, + IntCC::SignedGreaterThan, + IntCC::SignedLessThanOrEqual, + IntCC::UnsignedLessThan, + IntCC::UnsignedGreaterThanOrEqual, + IntCC::UnsignedGreaterThan, + IntCC::UnsignedLessThanOrEqual, + ] + } + + /// Get the corresponding IntCC with the equal component removed. + /// For conditions without a zero component, this is a no-op. + pub fn without_equal(self) -> Self { + use self::IntCC::*; + match self { + SignedGreaterThan | SignedGreaterThanOrEqual => SignedGreaterThan, + SignedLessThan | SignedLessThanOrEqual => SignedLessThan, + UnsignedGreaterThan | UnsignedGreaterThanOrEqual => UnsignedGreaterThan, + UnsignedLessThan | UnsignedLessThanOrEqual => UnsignedLessThan, + _ => self, + } + } + + /// Get the corresponding IntCC with the signed component removed. + /// For conditions without a signed component, this is a no-op. + pub fn unsigned(self) -> Self { + use self::IntCC::*; + match self { + SignedGreaterThan | UnsignedGreaterThan => UnsignedGreaterThan, + SignedGreaterThanOrEqual | UnsignedGreaterThanOrEqual => UnsignedGreaterThanOrEqual, + SignedLessThan | UnsignedLessThan => UnsignedLessThan, + SignedLessThanOrEqual | UnsignedLessThanOrEqual => UnsignedLessThanOrEqual, + _ => self, + } + } + + /// Get the corresponding string condition code for the IntCC object. + pub fn to_static_str(self) -> &'static str { + use self::IntCC::*; + match self { + Equal => "eq", + NotEqual => "ne", + SignedGreaterThan => "sgt", + SignedGreaterThanOrEqual => "sge", + SignedLessThan => "slt", + SignedLessThanOrEqual => "sle", + UnsignedGreaterThan => "ugt", + UnsignedGreaterThanOrEqual => "uge", + UnsignedLessThan => "ult", + UnsignedLessThanOrEqual => "ule", + } + } +} + +impl Display for IntCC { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + f.write_str(self.to_static_str()) + } +} + +impl FromStr for IntCC { + type Err = (); + + fn from_str(s: &str) -> Result { + use self::IntCC::*; + match s { + "eq" => Ok(Equal), + "ne" => Ok(NotEqual), + "sge" => Ok(SignedGreaterThanOrEqual), + "sgt" => Ok(SignedGreaterThan), + "sle" => Ok(SignedLessThanOrEqual), + "slt" => Ok(SignedLessThan), + "uge" => Ok(UnsignedGreaterThanOrEqual), + "ugt" => Ok(UnsignedGreaterThan), + "ule" => Ok(UnsignedLessThanOrEqual), + "ult" => Ok(UnsignedLessThan), + _ => Err(()), + } + } +} + +/// Condition code for comparing floating point numbers. +/// +/// This condition code is used by the `fcmp` instruction to compare floating point values. Two +/// IEEE floating point values relate in exactly one of four ways: +/// +/// 1. `UN` - unordered when either value is NaN. +/// 2. `EQ` - equal numerical value. +/// 3. `LT` - `x` is less than `y`. +/// 4. `GT` - `x` is greater than `y`. +/// +/// Note that `0.0` and `-0.0` relate as `EQ` because they both represent the number 0. +/// +/// The condition codes described here are used to produce a single boolean value from the +/// comparison. The 14 condition codes here cover every possible combination of the relation above +/// except the impossible `!UN & !EQ & !LT & !GT` and the always true `UN | EQ | LT | GT`. +#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub enum FloatCC { + /// EQ | LT | GT + Ordered, + /// UN + Unordered, + + /// EQ + Equal, + /// The C '!=' operator is the inverse of '==': `NotEqual`. + /// UN | LT | GT + NotEqual, + /// LT | GT + OrderedNotEqual, + /// UN | EQ + UnorderedOrEqual, + + /// LT + LessThan, + /// LT | EQ + LessThanOrEqual, + /// GT + GreaterThan, + /// GT | EQ + GreaterThanOrEqual, + + /// UN | LT + UnorderedOrLessThan, + /// UN | LT | EQ + UnorderedOrLessThanOrEqual, + /// UN | GT + UnorderedOrGreaterThan, + /// UN | GT | EQ + UnorderedOrGreaterThanOrEqual, +} + +impl FloatCC { + /// Returns a slice with all possible [FloatCC] values. + pub fn all() -> &'static [FloatCC] { + &[ + FloatCC::Ordered, + FloatCC::Unordered, + FloatCC::Equal, + FloatCC::NotEqual, + FloatCC::OrderedNotEqual, + FloatCC::UnorderedOrEqual, + FloatCC::LessThan, + FloatCC::LessThanOrEqual, + FloatCC::GreaterThan, + FloatCC::GreaterThanOrEqual, + FloatCC::UnorderedOrLessThan, + FloatCC::UnorderedOrLessThanOrEqual, + FloatCC::UnorderedOrGreaterThan, + FloatCC::UnorderedOrGreaterThanOrEqual, + ] + } +} + +impl CondCode for FloatCC { + fn complement(self) -> Self { + use self::FloatCC::*; + match self { + Ordered => Unordered, + Unordered => Ordered, + Equal => NotEqual, + NotEqual => Equal, + OrderedNotEqual => UnorderedOrEqual, + UnorderedOrEqual => OrderedNotEqual, + LessThan => UnorderedOrGreaterThanOrEqual, + LessThanOrEqual => UnorderedOrGreaterThan, + GreaterThan => UnorderedOrLessThanOrEqual, + GreaterThanOrEqual => UnorderedOrLessThan, + UnorderedOrLessThan => GreaterThanOrEqual, + UnorderedOrLessThanOrEqual => GreaterThan, + UnorderedOrGreaterThan => LessThanOrEqual, + UnorderedOrGreaterThanOrEqual => LessThan, + } + } + fn swap_args(self) -> Self { + use self::FloatCC::*; + match self { + Ordered => Ordered, + Unordered => Unordered, + Equal => Equal, + NotEqual => NotEqual, + OrderedNotEqual => OrderedNotEqual, + UnorderedOrEqual => UnorderedOrEqual, + LessThan => GreaterThan, + LessThanOrEqual => GreaterThanOrEqual, + GreaterThan => LessThan, + GreaterThanOrEqual => LessThanOrEqual, + UnorderedOrLessThan => UnorderedOrGreaterThan, + UnorderedOrLessThanOrEqual => UnorderedOrGreaterThanOrEqual, + UnorderedOrGreaterThan => UnorderedOrLessThan, + UnorderedOrGreaterThanOrEqual => UnorderedOrLessThanOrEqual, + } + } +} + +impl Display for FloatCC { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + use self::FloatCC::*; + f.write_str(match *self { + Ordered => "ord", + Unordered => "uno", + Equal => "eq", + NotEqual => "ne", + OrderedNotEqual => "one", + UnorderedOrEqual => "ueq", + LessThan => "lt", + LessThanOrEqual => "le", + GreaterThan => "gt", + GreaterThanOrEqual => "ge", + UnorderedOrLessThan => "ult", + UnorderedOrLessThanOrEqual => "ule", + UnorderedOrGreaterThan => "ugt", + UnorderedOrGreaterThanOrEqual => "uge", + }) + } +} + +impl FromStr for FloatCC { + type Err = (); + + fn from_str(s: &str) -> Result { + use self::FloatCC::*; + match s { + "ord" => Ok(Ordered), + "uno" => Ok(Unordered), + "eq" => Ok(Equal), + "ne" => Ok(NotEqual), + "one" => Ok(OrderedNotEqual), + "ueq" => Ok(UnorderedOrEqual), + "lt" => Ok(LessThan), + "le" => Ok(LessThanOrEqual), + "gt" => Ok(GreaterThan), + "ge" => Ok(GreaterThanOrEqual), + "ult" => Ok(UnorderedOrLessThan), + "ule" => Ok(UnorderedOrLessThanOrEqual), + "ugt" => Ok(UnorderedOrGreaterThan), + "uge" => Ok(UnorderedOrGreaterThanOrEqual), + _ => Err(()), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::string::ToString; + + #[test] + fn int_complement() { + for r in IntCC::all() { + let cc = *r; + let inv = cc.complement(); + assert!(cc != inv); + assert_eq!(inv.complement(), cc); + } + } + + #[test] + fn int_swap_args() { + for r in IntCC::all() { + let cc = *r; + let rev = cc.swap_args(); + assert_eq!(rev.swap_args(), cc); + } + } + + #[test] + fn int_display() { + for r in IntCC::all() { + let cc = *r; + assert_eq!(cc.to_string().parse(), Ok(cc)); + } + assert_eq!("bogus".parse::(), Err(())); + } + + #[test] + fn float_complement() { + for r in FloatCC::all() { + let cc = *r; + let inv = cc.complement(); + assert!(cc != inv); + assert_eq!(inv.complement(), cc); + } + } + + #[test] + fn float_swap_args() { + for r in FloatCC::all() { + let cc = *r; + let rev = cc.swap_args(); + assert_eq!(rev.swap_args(), cc); + } + } + + #[test] + fn float_display() { + for r in FloatCC::all() { + let cc = *r; + assert_eq!(cc.to_string().parse(), Ok(cc)); + } + assert_eq!("bogus".parse::(), Err(())); + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/constant.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/constant.rs new file mode 100644 index 000000000..0db31628c --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/constant.rs @@ -0,0 +1,485 @@ +//! Constants +//! +//! The constant pool defined here allows Cranelift to avoid emitting the same constant multiple +//! times. As constants are inserted in the pool, a handle is returned; the handle is a Cranelift +//! Entity. Inserting the same data multiple times will always return the same handle. +//! +//! Future work could include: +//! - ensuring alignment of constants within the pool, +//! - bucketing constants by size. + +use crate::ir::immediates::{Ieee128, IntoBytes, V128Imm}; +use crate::ir::Constant; +use alloc::collections::BTreeMap; +use alloc::vec::Vec; +use core::fmt; +use core::slice::Iter; +use core::str::{from_utf8, FromStr}; +use cranelift_entity::EntityRef; + +#[cfg(feature = "enable-serde")] +use serde_derive::{Deserialize, Serialize}; + +/// This type describes the actual constant data. Note that the bytes stored in this structure are +/// expected to be in little-endian order; this is due to ease-of-use when interacting with +/// WebAssembly values, which are [little-endian by design]. +/// +/// [little-endian by design]: https://github.com/WebAssembly/design/blob/master/Portability.md +#[derive(Clone, Hash, Eq, PartialEq, Debug, Default, PartialOrd, Ord)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct ConstantData(Vec); + +impl FromIterator for ConstantData { + fn from_iter>(iter: T) -> Self { + let v = iter.into_iter().collect(); + Self(v) + } +} + +impl From> for ConstantData { + fn from(v: Vec) -> Self { + Self(v) + } +} + +impl From<&[u8]> for ConstantData { + fn from(v: &[u8]) -> Self { + Self(v.to_vec()) + } +} + +impl From for ConstantData { + fn from(v: V128Imm) -> Self { + Self(v.to_vec()) + } +} + +impl From for ConstantData { + fn from(v: Ieee128) -> Self { + Self(v.into_bytes()) + } +} + +impl TryFrom<&ConstantData> for Ieee128 { + type Error = <[u8; 16] as TryFrom<&'static [u8]>>::Error; + + fn try_from(value: &ConstantData) -> Result { + Ok(Ieee128::with_bits(u128::from_le_bytes( + value.as_slice().try_into()?, + ))) + } +} + +impl ConstantData { + /// Return the number of bytes in the constant. + pub fn len(&self) -> usize { + self.0.len() + } + + /// Check if the constant contains any bytes. + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Return the data as a slice. + pub fn as_slice(&self) -> &[u8] { + self.0.as_slice() + } + + /// Convert the data to a vector. + pub fn into_vec(self) -> Vec { + self.0 + } + + /// Iterate over the constant's bytes. + pub fn iter(&self) -> Iter { + self.0.iter() + } + + /// Add new bytes to the constant data. + pub fn append(mut self, bytes: impl IntoBytes) -> Self { + let mut to_add = bytes.into_bytes(); + self.0.append(&mut to_add); + self + } + + /// Expand the size of the constant data to `expected_size` number of bytes by adding zeroes + /// in the high-order byte slots. + pub fn expand_to(mut self, expected_size: usize) -> Self { + if self.len() > expected_size { + panic!("The constant data is already expanded beyond {expected_size} bytes") + } + self.0.resize(expected_size, 0); + self + } +} + +impl fmt::Display for ConstantData { + /// Print the constant data in hexadecimal format, e.g. 0x000102030405060708090a0b0c0d0e0f. + /// This function will flip the stored order of bytes--little-endian--to the more readable + /// big-endian ordering. + /// + /// ``` + /// use cranelift_codegen::ir::ConstantData; + /// let data = ConstantData::from([3, 2, 1, 0, 0].as_ref()); // note the little-endian order + /// assert_eq!(data.to_string(), "0x0000010203"); + /// ``` + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if !self.is_empty() { + write!(f, "0x")?; + for b in self.0.iter().rev() { + write!(f, "{b:02x}")?; + } + } + Ok(()) + } +} + +impl FromStr for ConstantData { + type Err = &'static str; + + /// Parse a hexadecimal string to `ConstantData`. This is the inverse of `Display::fmt`. + /// + /// ``` + /// use cranelift_codegen::ir::ConstantData; + /// let c: ConstantData = "0x000102".parse().unwrap(); + /// assert_eq!(c.into_vec(), [2, 1, 0]); + /// ``` + fn from_str(s: &str) -> Result { + if s.len() <= 2 || &s[0..2] != "0x" { + return Err("Expected a hexadecimal string, e.g. 0x1234"); + } + + // clean and check the string + let cleaned: Vec = s[2..] + .as_bytes() + .iter() + .filter(|&&b| b as char != '_') + .cloned() + .collect(); // remove 0x prefix and any intervening _ characters + + if cleaned.is_empty() { + Err("Hexadecimal string must have some digits") + } else if cleaned.len() % 2 != 0 { + Err("Hexadecimal string must have an even number of digits") + } else if cleaned.len() > 32 { + Err("Hexadecimal string has too many digits to fit in a 128-bit vector") + } else { + let mut buffer = Vec::with_capacity((s.len() - 2) / 2); + for i in (0..cleaned.len()).step_by(2) { + let pair = from_utf8(&cleaned[i..i + 2]) + .or_else(|_| Err("Unable to parse hexadecimal pair as UTF-8"))?; + let byte = u8::from_str_radix(pair, 16) + .or_else(|_| Err("Unable to parse as hexadecimal"))?; + buffer.insert(0, byte); + } + Ok(Self(buffer)) + } + } +} + +/// Maintains the mapping between a constant handle (i.e. [`Constant`]) and +/// its constant data (i.e. [`ConstantData`]). +#[derive(Clone, PartialEq, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct ConstantPool { + /// This mapping maintains the insertion order as long as Constants are created with + /// sequentially increasing integers. + /// + /// It is important that, by construction, no entry in that list gets removed. If that ever + /// need to happen, don't forget to update the `Constant` generation scheme. + handles_to_values: BTreeMap, + + /// Mapping of hashed `ConstantData` to the index into the other hashmap. + /// + /// This allows for deduplication of entries into the `handles_to_values` mapping. + values_to_handles: BTreeMap, +} + +impl ConstantPool { + /// Create a new constant pool instance. + pub fn new() -> Self { + Self { + handles_to_values: BTreeMap::new(), + values_to_handles: BTreeMap::new(), + } + } + + /// Empty the constant pool of all data. + pub fn clear(&mut self) { + self.handles_to_values.clear(); + self.values_to_handles.clear(); + } + + /// Insert constant data into the pool, returning a handle for later referencing; when constant + /// data is inserted that is a duplicate of previous constant data, the existing handle will be + /// returned. + pub fn insert(&mut self, constant_value: ConstantData) -> Constant { + if let Some(cst) = self.values_to_handles.get(&constant_value) { + return *cst; + } + + let constant_handle = Constant::new(self.len()); + self.set(constant_handle, constant_value); + constant_handle + } + + /// Retrieve the constant data given a handle. + pub fn get(&self, constant_handle: Constant) -> &ConstantData { + assert!(self.handles_to_values.contains_key(&constant_handle)); + self.handles_to_values.get(&constant_handle).unwrap() + } + + /// Link a constant handle to its value. This does not de-duplicate data but does avoid + /// replacing any existing constant values. use `set` to tie a specific `const42` to its value; + /// use `insert` to add a value and return the next available `const` entity. + pub fn set(&mut self, constant_handle: Constant, constant_value: ConstantData) { + let replaced = self + .handles_to_values + .insert(constant_handle, constant_value.clone()); + assert!( + replaced.is_none(), + "attempted to overwrite an existing constant {:?}: {:?} => {:?}", + constant_handle, + &constant_value, + replaced.unwrap() + ); + self.values_to_handles + .insert(constant_value, constant_handle); + } + + /// Iterate over the constants in insertion order. + pub fn iter(&self) -> impl Iterator { + self.handles_to_values.iter() + } + + /// Iterate over mutable entries in the constant pool in insertion order. + pub fn entries_mut(&mut self) -> impl Iterator { + self.handles_to_values.values_mut() + } + + /// Return the number of constants in the pool. + pub fn len(&self) -> usize { + self.handles_to_values.len() + } + + /// Return the combined size of all of the constant values in the pool. + pub fn byte_size(&self) -> usize { + self.handles_to_values.values().map(|c| c.len()).sum() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::string::ToString; + + #[test] + fn empty() { + let sut = ConstantPool::new(); + assert_eq!(sut.len(), 0); + } + + #[test] + fn insert() { + let mut sut = ConstantPool::new(); + sut.insert(vec![1, 2, 3].into()); + sut.insert(vec![4, 5, 6].into()); + assert_eq!(sut.len(), 2); + } + + #[test] + fn insert_duplicate() { + let mut sut = ConstantPool::new(); + let a = sut.insert(vec![1, 2, 3].into()); + sut.insert(vec![4, 5, 6].into()); + let b = sut.insert(vec![1, 2, 3].into()); + assert_eq!(a, b); + } + + #[test] + fn clear() { + let mut sut = ConstantPool::new(); + sut.insert(vec![1, 2, 3].into()); + assert_eq!(sut.len(), 1); + + sut.clear(); + assert_eq!(sut.len(), 0); + } + + #[test] + fn iteration_order() { + let mut sut = ConstantPool::new(); + sut.insert(vec![1, 2, 3].into()); + sut.insert(vec![4, 5, 6].into()); + sut.insert(vec![1, 2, 3].into()); + let data = sut.iter().map(|(_, v)| v).collect::>(); + assert_eq!(data, vec![&vec![1, 2, 3].into(), &vec![4, 5, 6].into()]); + } + + #[test] + fn get() { + let mut sut = ConstantPool::new(); + let data = vec![1, 2, 3]; + let handle = sut.insert(data.clone().into()); + assert_eq!(sut.get(handle), &data.into()); + } + + #[test] + fn set() { + let mut sut = ConstantPool::new(); + let handle = Constant::with_number(42).unwrap(); + let data = vec![1, 2, 3]; + sut.set(handle, data.clone().into()); + assert_eq!(sut.get(handle), &data.into()); + } + + #[test] + #[should_panic] + fn disallow_overwriting_constant() { + let mut sut = ConstantPool::new(); + let handle = Constant::with_number(42).unwrap(); + sut.set(handle, vec![].into()); + sut.set(handle, vec![1].into()); + } + + #[test] + #[should_panic] + fn get_nonexistent_constant() { + let sut = ConstantPool::new(); + let a = Constant::with_number(42).unwrap(); + sut.get(a); // panics, only use constants returned by ConstantPool + } + + #[test] + fn display_constant_data() { + assert_eq!(ConstantData::from([0].as_ref()).to_string(), "0x00"); + assert_eq!(ConstantData::from([42].as_ref()).to_string(), "0x2a"); + assert_eq!( + ConstantData::from([3, 2, 1, 0].as_ref()).to_string(), + "0x00010203" + ); + assert_eq!( + ConstantData::from(3735928559u32.to_le_bytes().as_ref()).to_string(), + "0xdeadbeef" + ); + assert_eq!( + ConstantData::from(0x0102030405060708u64.to_le_bytes().as_ref()).to_string(), + "0x0102030405060708" + ); + } + + #[test] + fn iterate_over_constant_data() { + let c = ConstantData::from([1, 2, 3].as_ref()); + let mut iter = c.iter(); + assert_eq!(iter.next(), Some(&1)); + assert_eq!(iter.next(), Some(&2)); + assert_eq!(iter.next(), Some(&3)); + assert_eq!(iter.next(), None); + } + + #[test] + fn add_to_constant_data() { + let d = ConstantData::from([1, 2].as_ref()); + let e = d.append(i16::from(3u8)); + assert_eq!(e.into_vec(), vec![1, 2, 3, 0]) + } + + #[test] + fn extend_constant_data() { + let d = ConstantData::from([1, 2].as_ref()); + assert_eq!(d.expand_to(4).into_vec(), vec![1, 2, 0, 0]) + } + + #[test] + #[should_panic] + fn extend_constant_data_to_invalid_length() { + ConstantData::from([1, 2].as_ref()).expand_to(1); + } + + #[test] + fn parse_constant_data_and_restringify() { + // Verify that parsing of `from` succeeds and stringifies to `to`. + fn parse_ok(from: &str, to: &str) { + let parsed = from.parse::().unwrap(); + assert_eq!(parsed.to_string(), to); + } + + // Verify that parsing of `from` fails with `error_msg`. + fn parse_err(from: &str, error_msg: &str) { + let parsed = from.parse::(); + assert!( + parsed.is_err(), + "Expected a parse error but parsing succeeded: {from}" + ); + assert_eq!(parsed.err().unwrap(), error_msg); + } + + parse_ok("0x00", "0x00"); + parse_ok("0x00000042", "0x00000042"); + parse_ok( + "0x0102030405060708090a0b0c0d0e0f00", + "0x0102030405060708090a0b0c0d0e0f00", + ); + parse_ok("0x_0000_0043_21", "0x0000004321"); + + parse_err("", "Expected a hexadecimal string, e.g. 0x1234"); + parse_err("0x", "Expected a hexadecimal string, e.g. 0x1234"); + parse_err( + "0x042", + "Hexadecimal string must have an even number of digits", + ); + parse_err( + "0x00000000000000000000000000000000000000000000000000", + "Hexadecimal string has too many digits to fit in a 128-bit vector", + ); + parse_err("0xrstu", "Unable to parse as hexadecimal"); + parse_err("0x__", "Hexadecimal string must have some digits"); + } + + #[test] + fn verify_stored_bytes_in_constant_data() { + assert_eq!("0x01".parse::().unwrap().into_vec(), [1]); + assert_eq!(ConstantData::from([1, 0].as_ref()).0, [1, 0]); + assert_eq!(ConstantData::from(vec![1, 0, 0, 0]).0, [1, 0, 0, 0]); + } + + #[test] + fn check_constant_data_endianness_as_uimm128() { + fn parse_to_uimm128(from: &str) -> Vec { + from.parse::() + .unwrap() + .expand_to(16) + .into_vec() + } + + assert_eq!( + parse_to_uimm128("0x42"), + [0x42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ); + assert_eq!( + parse_to_uimm128("0x00"), + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ); + assert_eq!( + parse_to_uimm128("0x12345678"), + [0x78, 0x56, 0x34, 0x12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ); + assert_eq!( + parse_to_uimm128("0x1234_5678"), + [0x78, 0x56, 0x34, 0x12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ); + } + + #[test] + fn constant_ieee128() { + let value = Ieee128::with_bits(0x000102030405060708090a0b0c0d0e0f); + let constant = ConstantData::from(value); + assert_eq!( + constant.as_slice(), + &[0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0] + ); + assert_eq!(Ieee128::try_from(&constant).unwrap().bits(), value.bits()); + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/dfg.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/dfg.rs new file mode 100644 index 000000000..593d4cdb4 --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/dfg.rs @@ -0,0 +1,1798 @@ +//! Data flow graph tracking Instructions, Values, and blocks. + +use crate::entity::{self, PrimaryMap, SecondaryMap}; +use crate::ir; +use crate::ir::builder::ReplaceBuilder; +use crate::ir::dynamic_type::{DynamicTypeData, DynamicTypes}; +use crate::ir::instructions::{CallInfo, InstructionData}; +use crate::ir::pcc::Fact; +use crate::ir::user_stack_maps::{UserStackMapEntry, UserStackMapEntryVec}; +use crate::ir::{ + types, Block, BlockCall, ConstantData, ConstantPool, DynamicType, ExtFuncData, FuncRef, + Immediate, Inst, JumpTables, RelSourceLoc, SigRef, Signature, Type, Value, + ValueLabelAssignments, ValueList, ValueListPool, +}; +use crate::packed_option::ReservedValue; +use crate::write::write_operands; +use core::fmt; +use core::iter; +use core::mem; +use core::ops::{Index, IndexMut}; +use core::u16; + +use alloc::collections::BTreeMap; +#[cfg(feature = "enable-serde")] +use serde_derive::{Deserialize, Serialize}; +use smallvec::SmallVec; + +/// Storage for instructions within the DFG. +#[derive(Clone, PartialEq, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct Insts(PrimaryMap); + +/// Allow immutable access to instructions via indexing. +impl Index for Insts { + type Output = InstructionData; + + fn index(&self, inst: Inst) -> &InstructionData { + self.0.index(inst) + } +} + +/// Allow mutable access to instructions via indexing. +impl IndexMut for Insts { + fn index_mut(&mut self, inst: Inst) -> &mut InstructionData { + self.0.index_mut(inst) + } +} + +/// Storage for basic blocks within the DFG. +#[derive(Clone, PartialEq, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct Blocks(PrimaryMap); + +impl Blocks { + /// Create a new basic block. + pub fn add(&mut self) -> Block { + self.0.push(BlockData::new()) + } + + /// Get the total number of basic blocks created in this function, whether they are + /// currently inserted in the layout or not. + /// + /// This is intended for use with `SecondaryMap::with_capacity`. + pub fn len(&self) -> usize { + self.0.len() + } + + /// Returns `true` if the given block reference is valid. + pub fn is_valid(&self, block: Block) -> bool { + self.0.is_valid(block) + } +} + +impl Index for Blocks { + type Output = BlockData; + + fn index(&self, block: Block) -> &BlockData { + &self.0[block] + } +} + +impl IndexMut for Blocks { + fn index_mut(&mut self, block: Block) -> &mut BlockData { + &mut self.0[block] + } +} + +/// A data flow graph defines all instructions and basic blocks in a function as well as +/// the data flow dependencies between them. The DFG also tracks values which can be either +/// instruction results or block parameters. +/// +/// The layout of blocks in the function and of instructions in each block is recorded by the +/// `Layout` data structure which forms the other half of the function representation. +/// +#[derive(Clone, PartialEq, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct DataFlowGraph { + /// Data about all of the instructions in the function, including opcodes and operands. + /// The instructions in this map are not in program order. That is tracked by `Layout`, along + /// with the block containing each instruction. + pub insts: Insts, + + /// List of result values for each instruction. + /// + /// This map gets resized automatically by `make_inst()` so it is always in sync with the + /// primary `insts` map. + results: SecondaryMap, + + /// User-defined stack maps. + /// + /// Not to be confused with the stack maps that `regalloc2` produces. These + /// are defined by the user in `cranelift-frontend`. These will eventually + /// replace the stack maps support in `regalloc2`, but in the name of + /// incrementalism and avoiding gigantic PRs that completely overhaul + /// Cranelift and Wasmtime at the same time, we are allowing them to live in + /// parallel for the time being. + user_stack_maps: alloc::collections::BTreeMap, + + /// basic blocks in the function and their parameters. + /// + /// This map is not in program order. That is handled by `Layout`, and so is the sequence of + /// instructions contained in each block. + pub blocks: Blocks, + + /// Dynamic types created. + pub dynamic_types: DynamicTypes, + + /// Memory pool of value lists. + /// + /// The `ValueList` references into this pool appear in many places: + /// + /// - Instructions in `insts` that don't have room for their entire argument list inline. + /// - Instruction result values in `results`. + /// - block parameters in `blocks`. + pub value_lists: ValueListPool, + + /// Primary value table with entries for all values. + values: PrimaryMap, + + /// Facts: proof-carrying-code assertions about values. + pub facts: SecondaryMap>, + + /// Function signature table. These signatures are referenced by indirect call instructions as + /// well as the external function references. + pub signatures: PrimaryMap, + + /// External function references. These are functions that can be called directly. + pub ext_funcs: PrimaryMap, + + /// Saves Value labels. + pub values_labels: Option>, + + /// Constants used within the function. + pub constants: ConstantPool, + + /// Stores large immediates that otherwise will not fit on InstructionData. + pub immediates: PrimaryMap, + + /// Jump tables used in this function. + pub jump_tables: JumpTables, +} + +impl DataFlowGraph { + /// Create a new empty `DataFlowGraph`. + pub fn new() -> Self { + Self { + insts: Insts(PrimaryMap::new()), + results: SecondaryMap::new(), + user_stack_maps: alloc::collections::BTreeMap::new(), + blocks: Blocks(PrimaryMap::new()), + dynamic_types: DynamicTypes::new(), + value_lists: ValueListPool::new(), + values: PrimaryMap::new(), + facts: SecondaryMap::new(), + signatures: PrimaryMap::new(), + ext_funcs: PrimaryMap::new(), + values_labels: None, + constants: ConstantPool::new(), + immediates: PrimaryMap::new(), + jump_tables: JumpTables::new(), + } + } + + /// Clear everything. + pub fn clear(&mut self) { + self.insts.0.clear(); + self.results.clear(); + self.user_stack_maps.clear(); + self.blocks.0.clear(); + self.dynamic_types.clear(); + self.value_lists.clear(); + self.values.clear(); + self.signatures.clear(); + self.ext_funcs.clear(); + self.values_labels = None; + self.constants.clear(); + self.immediates.clear(); + self.jump_tables.clear(); + self.facts.clear(); + } + + /// Get the total number of instructions created in this function, whether they are currently + /// inserted in the layout or not. + /// + /// This is intended for use with `SecondaryMap::with_capacity`. + pub fn num_insts(&self) -> usize { + self.insts.0.len() + } + + /// Returns `true` if the given instruction reference is valid. + pub fn inst_is_valid(&self, inst: Inst) -> bool { + self.insts.0.is_valid(inst) + } + + /// Get the total number of basic blocks created in this function, whether they are + /// currently inserted in the layout or not. + /// + /// This is intended for use with `SecondaryMap::with_capacity`. + pub fn num_blocks(&self) -> usize { + self.blocks.len() + } + + /// Returns `true` if the given block reference is valid. + pub fn block_is_valid(&self, block: Block) -> bool { + self.blocks.is_valid(block) + } + + /// Make a BlockCall, bundling together the block and its arguments. + pub fn block_call(&mut self, block: Block, args: &[Value]) -> BlockCall { + BlockCall::new(block, args, &mut self.value_lists) + } + + /// Get the total number of values. + pub fn num_values(&self) -> usize { + self.values.len() + } + + /// Get an iterator over all values and their definitions. + pub fn values_and_defs(&self) -> impl Iterator + '_ { + self.values().map(|value| (value, self.value_def(value))) + } + + /// Starts collection of debug information. + pub fn collect_debug_info(&mut self) { + if self.values_labels.is_none() { + self.values_labels = Some(Default::default()); + } + } + + /// Inserts a `ValueLabelAssignments::Alias` for `to_alias` if debug info + /// collection is enabled. + pub fn add_value_label_alias(&mut self, to_alias: Value, from: RelSourceLoc, value: Value) { + if let Some(values_labels) = self.values_labels.as_mut() { + values_labels.insert(to_alias, ir::ValueLabelAssignments::Alias { from, value }); + } + } +} + +/// Resolve value aliases. +/// +/// Find the original SSA value that `value` aliases, or None if an +/// alias cycle is detected. +fn maybe_resolve_aliases( + values: &PrimaryMap, + value: Value, +) -> Option { + let mut v = value; + + // Note that values may be empty here. + for _ in 0..=values.len() { + if let ValueData::Alias { original, .. } = ValueData::from(values[v]) { + v = original; + } else { + return Some(v); + } + } + + None +} + +/// Resolve value aliases. +/// +/// Find the original SSA value that `value` aliases. +fn resolve_aliases(values: &PrimaryMap, value: Value) -> Value { + if let Some(v) = maybe_resolve_aliases(values, value) { + v + } else { + panic!("Value alias loop detected for {value}"); + } +} + +/// Iterator over all Values in a DFG. +pub struct Values<'a> { + inner: entity::Iter<'a, Value, ValueDataPacked>, +} + +/// Check for non-values. +fn valid_valuedata(data: ValueDataPacked) -> bool { + let data = ValueData::from(data); + if let ValueData::Alias { + ty: types::INVALID, + original, + } = ValueData::from(data) + { + if original == Value::reserved_value() { + return false; + } + } + true +} + +impl<'a> Iterator for Values<'a> { + type Item = Value; + + fn next(&mut self) -> Option { + self.inner + .by_ref() + .find(|kv| valid_valuedata(*kv.1)) + .map(|kv| kv.0) + } +} + +/// Handling values. +/// +/// Values are either block parameters or instruction results. +impl DataFlowGraph { + /// Allocate an extended value entry. + fn make_value(&mut self, data: ValueData) -> Value { + self.values.push(data.into()) + } + + /// Get an iterator over all values. + pub fn values<'a>(&'a self) -> Values<'a> { + Values { + inner: self.values.iter(), + } + } + + /// Check if a value reference is valid. + pub fn value_is_valid(&self, v: Value) -> bool { + self.values.is_valid(v) + } + + /// Check whether a value is valid and not an alias. + pub fn value_is_real(&self, value: Value) -> bool { + // Deleted or unused values are also stored as aliases so this excludes + // those as well. + self.value_is_valid(value) && !matches!(self.values[value].into(), ValueData::Alias { .. }) + } + + /// Get the type of a value. + pub fn value_type(&self, v: Value) -> Type { + self.values[v].ty() + } + + /// Get the definition of a value. + /// + /// This is either the instruction that defined it or the Block that has the value as an + /// parameter. + pub fn value_def(&self, v: Value) -> ValueDef { + match ValueData::from(self.values[v]) { + ValueData::Inst { inst, num, .. } => ValueDef::Result(inst, num as usize), + ValueData::Param { block, num, .. } => ValueDef::Param(block, num as usize), + ValueData::Alias { original, .. } => { + // Make sure we only recurse one level. `resolve_aliases` has safeguards to + // detect alias loops without overrunning the stack. + self.value_def(self.resolve_aliases(original)) + } + ValueData::Union { x, y, .. } => ValueDef::Union(x, y), + } + } + + /// Determine if `v` is an attached instruction result / block parameter. + /// + /// An attached value can't be attached to something else without first being detached. + /// + /// Value aliases are not considered to be attached to anything. Use `resolve_aliases()` to + /// determine if the original aliased value is attached. + pub fn value_is_attached(&self, v: Value) -> bool { + use self::ValueData::*; + match ValueData::from(self.values[v]) { + Inst { inst, num, .. } => Some(&v) == self.inst_results(inst).get(num as usize), + Param { block, num, .. } => Some(&v) == self.block_params(block).get(num as usize), + Alias { .. } => false, + Union { .. } => false, + } + } + + /// Resolve value aliases. + /// + /// Find the original SSA value that `value` aliases. + pub fn resolve_aliases(&self, value: Value) -> Value { + resolve_aliases(&self.values, value) + } + + /// Replace all uses of value aliases with their resolved values, and delete + /// the aliases. + pub fn resolve_all_aliases(&mut self) { + let invalid_value = ValueDataPacked::from(ValueData::Alias { + ty: types::INVALID, + original: Value::reserved_value(), + }); + + // Rewrite each chain of aliases. Update every alias along the chain + // into an alias directly to the final value. Due to updating every + // alias that it looks at, this loop runs in time linear in the number + // of values. + for mut src in self.values.keys() { + let value_data = self.values[src]; + if value_data == invalid_value { + continue; + } + if let ValueData::Alias { mut original, .. } = value_data.into() { + // We don't use the type after this, we just need some place to + // store the resolved aliases temporarily. + let resolved = ValueDataPacked::from(ValueData::Alias { + ty: types::INVALID, + original: resolve_aliases(&self.values, original), + }); + // Walk the chain again, splatting the new alias everywhere. + // resolve_aliases panics if there's an alias cycle, so we don't + // need to guard against cycles here. + loop { + self.values[src] = resolved; + src = original; + if let ValueData::Alias { original: next, .. } = self.values[src].into() { + original = next; + } else { + break; + } + } + } + } + + // Now aliases don't point to other aliases, so we can replace any use + // of an alias with the final value in constant time. + + // Rewrite InstructionData in `self.insts`. + for inst in self.insts.0.values_mut() { + inst.map_values(&mut self.value_lists, &mut self.jump_tables, |arg| { + if let ValueData::Alias { original, .. } = self.values[arg].into() { + original + } else { + arg + } + }); + } + + // - `results` and block-params in `blocks` are not aliases, by + // definition. + // - `dynamic_types` has no values. + // - `value_lists` can only be accessed via references from elsewhere. + // - `values` only has value references in aliases (which we've + // removed), and unions (but the egraph pass ensures there are no + // aliases before creating unions). + + // Merge `facts` from any alias onto the aliased value. Note that if + // there was a chain of aliases, at this point every alias that was in + // the chain points to the same final value, so their facts will all be + // merged together. + for value in self.facts.keys() { + if let ValueData::Alias { original, .. } = self.values[value].into() { + if let Some(new_fact) = self.facts[value].take() { + match &mut self.facts[original] { + Some(old_fact) => *old_fact = Fact::intersect(old_fact, &new_fact), + old_fact => *old_fact = Some(new_fact), + } + } + } + } + + // - `signatures` and `ext_funcs` have no values. + + if let Some(values_labels) = &mut self.values_labels { + // Debug info is best-effort. If any is attached to value aliases, + // just discard it. + values_labels.retain(|&k, _| !matches!(self.values[k].into(), ValueData::Alias { .. })); + + // If debug-info says a value should have the same labels as another + // value, then make sure that target is not a value alias. + for value_label in values_labels.values_mut() { + if let ValueLabelAssignments::Alias { value, .. } = value_label { + if let ValueData::Alias { original, .. } = self.values[*value].into() { + *value = original; + } + } + } + } + + // - `constants` and `immediates` have no values. + // - `jump_tables` is updated together with instruction-data above. + + // Delete all aliases now that there are no uses left. + for value in self.values.values_mut() { + if let ValueData::Alias { .. } = ValueData::from(*value) { + *value = invalid_value; + } + } + } + + /// Turn a value into an alias of another. + /// + /// Change the `dest` value to behave as an alias of `src`. This means that all uses of `dest` + /// will behave as if they used that value `src`. + /// + /// The `dest` value can't be attached to an instruction or block. + pub fn change_to_alias(&mut self, dest: Value, src: Value) { + debug_assert!(!self.value_is_attached(dest)); + // Try to create short alias chains by finding the original source value. + // This also avoids the creation of loops. + let original = self.resolve_aliases(src); + debug_assert_ne!( + dest, original, + "Aliasing {dest} to {src} would create a loop" + ); + let ty = self.value_type(original); + debug_assert_eq!( + self.value_type(dest), + ty, + "Aliasing {} to {} would change its type {} to {}", + dest, + src, + self.value_type(dest), + ty + ); + debug_assert_ne!(ty, types::INVALID); + + self.values[dest] = ValueData::Alias { ty, original }.into(); + } + + /// Replace the results of one instruction with aliases to the results of another. + /// + /// Change all the results of `dest_inst` to behave as aliases of + /// corresponding results of `src_inst`, as if calling change_to_alias for + /// each. + /// + /// After calling this instruction, `dest_inst` will have had its results + /// cleared, so it likely needs to be removed from the graph. + /// + pub fn replace_with_aliases(&mut self, dest_inst: Inst, original_inst: Inst) { + debug_assert_ne!( + dest_inst, original_inst, + "Replacing {dest_inst} with itself would create a loop" + ); + + let dest_results = self.results[dest_inst].as_slice(&self.value_lists); + let original_results = self.results[original_inst].as_slice(&self.value_lists); + + debug_assert_eq!( + dest_results.len(), + original_results.len(), + "Replacing {dest_inst} with {original_inst} would produce a different number of results." + ); + + for (&dest, &original) in dest_results.iter().zip(original_results) { + let ty = self.value_type(original); + debug_assert_eq!( + self.value_type(dest), + ty, + "Aliasing {} to {} would change its type {} to {}", + dest, + original, + self.value_type(dest), + ty + ); + debug_assert_ne!(ty, types::INVALID); + + self.values[dest] = ValueData::Alias { ty, original }.into(); + } + + self.clear_results(dest_inst); + } + + /// Get the stack map entries associated with the given instruction. + pub fn user_stack_map_entries(&self, inst: Inst) -> Option<&[UserStackMapEntry]> { + self.user_stack_maps.get(&inst).map(|es| &**es) + } + + /// Append a new stack map entry for the given call instruction. + /// + /// # Panics + /// + /// Panics if the given instruction is not a (non-tail) call instruction. + pub fn append_user_stack_map_entry(&mut self, inst: Inst, entry: UserStackMapEntry) { + let opcode = self.insts[inst].opcode(); + assert!(opcode.is_safepoint()); + self.user_stack_maps.entry(inst).or_default().push(entry); + } +} + +/// Where did a value come from? +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum ValueDef { + /// Value is the n'th result of an instruction. + Result(Inst, usize), + /// Value is the n'th parameter to a block. + Param(Block, usize), + /// Value is a union of two other values. + Union(Value, Value), +} + +impl ValueDef { + /// Unwrap the instruction where the value was defined, or panic. + pub fn unwrap_inst(&self) -> Inst { + self.inst().expect("Value is not an instruction result") + } + + /// Get the instruction where the value was defined, if any. + pub fn inst(&self) -> Option { + match *self { + Self::Result(inst, _) => Some(inst), + _ => None, + } + } + + /// Unwrap the block there the parameter is defined, or panic. + pub fn unwrap_block(&self) -> Block { + match *self { + Self::Param(block, _) => block, + _ => panic!("Value is not a block parameter"), + } + } + + /// Get the number component of this definition. + /// + /// When multiple values are defined at the same program point, this indicates the index of + /// this value. + pub fn num(self) -> usize { + match self { + Self::Result(_, n) | Self::Param(_, n) => n, + Self::Union(_, _) => 0, + } + } +} + +/// Internal table storage for extended values. +#[derive(Clone, Debug, PartialEq, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +enum ValueData { + /// Value is defined by an instruction. + Inst { ty: Type, num: u16, inst: Inst }, + + /// Value is a block parameter. + Param { ty: Type, num: u16, block: Block }, + + /// Value is an alias of another value. + /// An alias value can't be linked as an instruction result or block parameter. It is used as a + /// placeholder when the original instruction or block has been rewritten or modified. + Alias { ty: Type, original: Value }, + + /// Union is a "fork" in representation: the value can be + /// represented as either of the values named here. This is used + /// for aegraph (acyclic egraph) representation in the DFG. + Union { ty: Type, x: Value, y: Value }, +} + +/// Bit-packed version of ValueData, for efficiency. +/// +/// Layout: +/// +/// ```plain +/// | tag:2 | type:14 | x:24 | y:24 | +/// +/// Inst 00 ty inst output inst index +/// Param 01 ty blockparam num block index +/// Alias 10 ty 0 value index +/// Union 11 ty first value second value +/// ``` +#[derive(Clone, Copy, Debug, PartialEq, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +struct ValueDataPacked(u64); + +/// Encodes a value in 0..2^32 into 0..2^n, where n is less than 32 +/// (and is implied by `mask`), by translating 2^32-1 (0xffffffff) +/// into 2^n-1 and panic'ing on 2^n..2^32-1. +fn encode_narrow_field(x: u32, bits: u8) -> u32 { + let max = (1 << bits) - 1; + if x == 0xffff_ffff { + max + } else { + debug_assert!( + x < max, + "{x} does not fit into {bits} bits (must be less than {max} to \ + allow for a 0xffffffff sentinel)" + ); + x + } +} + +/// The inverse of the above `encode_narrow_field`: unpacks 2^n-1 into +/// 2^32-1. +fn decode_narrow_field(x: u32, bits: u8) -> u32 { + if x == (1 << bits) - 1 { + 0xffff_ffff + } else { + x + } +} + +impl ValueDataPacked { + const Y_SHIFT: u8 = 0; + const Y_BITS: u8 = 24; + const X_SHIFT: u8 = Self::Y_SHIFT + Self::Y_BITS; + const X_BITS: u8 = 24; + const TYPE_SHIFT: u8 = Self::X_SHIFT + Self::X_BITS; + const TYPE_BITS: u8 = 14; + const TAG_SHIFT: u8 = Self::TYPE_SHIFT + Self::TYPE_BITS; + const TAG_BITS: u8 = 2; + + const TAG_INST: u64 = 0; + const TAG_PARAM: u64 = 1; + const TAG_ALIAS: u64 = 2; + const TAG_UNION: u64 = 3; + + fn make(tag: u64, ty: Type, x: u32, y: u32) -> ValueDataPacked { + debug_assert!(tag < (1 << Self::TAG_BITS)); + debug_assert!(ty.repr() < (1 << Self::TYPE_BITS)); + + let x = encode_narrow_field(x, Self::X_BITS); + let y = encode_narrow_field(y, Self::Y_BITS); + + ValueDataPacked( + (tag << Self::TAG_SHIFT) + | ((ty.repr() as u64) << Self::TYPE_SHIFT) + | ((x as u64) << Self::X_SHIFT) + | ((y as u64) << Self::Y_SHIFT), + ) + } + + #[inline(always)] + fn field(self, shift: u8, bits: u8) -> u64 { + (self.0 >> shift) & ((1 << bits) - 1) + } + + #[inline(always)] + fn ty(self) -> Type { + let ty = self.field(ValueDataPacked::TYPE_SHIFT, ValueDataPacked::TYPE_BITS) as u16; + Type::from_repr(ty) + } + + #[inline(always)] + fn set_type(&mut self, ty: Type) { + self.0 &= !(((1 << Self::TYPE_BITS) - 1) << Self::TYPE_SHIFT); + self.0 |= (ty.repr() as u64) << Self::TYPE_SHIFT; + } +} + +impl From for ValueDataPacked { + fn from(data: ValueData) -> Self { + match data { + ValueData::Inst { ty, num, inst } => { + Self::make(Self::TAG_INST, ty, num.into(), inst.as_bits()) + } + ValueData::Param { ty, num, block } => { + Self::make(Self::TAG_PARAM, ty, num.into(), block.as_bits()) + } + ValueData::Alias { ty, original } => { + Self::make(Self::TAG_ALIAS, ty, 0, original.as_bits()) + } + ValueData::Union { ty, x, y } => { + Self::make(Self::TAG_UNION, ty, x.as_bits(), y.as_bits()) + } + } + } +} + +impl From for ValueData { + fn from(data: ValueDataPacked) -> Self { + let tag = data.field(ValueDataPacked::TAG_SHIFT, ValueDataPacked::TAG_BITS); + let ty = u16::try_from(data.field(ValueDataPacked::TYPE_SHIFT, ValueDataPacked::TYPE_BITS)) + .expect("Mask should ensure result fits in a u16"); + let x = u32::try_from(data.field(ValueDataPacked::X_SHIFT, ValueDataPacked::X_BITS)) + .expect("Mask should ensure result fits in a u32"); + let y = u32::try_from(data.field(ValueDataPacked::Y_SHIFT, ValueDataPacked::Y_BITS)) + .expect("Mask should ensure result fits in a u32"); + + let ty = Type::from_repr(ty); + match tag { + ValueDataPacked::TAG_INST => ValueData::Inst { + ty, + num: u16::try_from(x).expect("Inst result num should fit in u16"), + inst: Inst::from_bits(decode_narrow_field(y, ValueDataPacked::Y_BITS)), + }, + ValueDataPacked::TAG_PARAM => ValueData::Param { + ty, + num: u16::try_from(x).expect("Blockparam index should fit in u16"), + block: Block::from_bits(decode_narrow_field(y, ValueDataPacked::Y_BITS)), + }, + ValueDataPacked::TAG_ALIAS => ValueData::Alias { + ty, + original: Value::from_bits(decode_narrow_field(y, ValueDataPacked::Y_BITS)), + }, + ValueDataPacked::TAG_UNION => ValueData::Union { + ty, + x: Value::from_bits(decode_narrow_field(x, ValueDataPacked::X_BITS)), + y: Value::from_bits(decode_narrow_field(y, ValueDataPacked::Y_BITS)), + }, + _ => panic!("Invalid tag {} in ValueDataPacked 0x{:x}", tag, data.0), + } + } +} + +/// Instructions. +/// +impl DataFlowGraph { + /// Create a new instruction. + /// + /// The type of the first result is indicated by `data.ty`. If the + /// instruction produces multiple results, also call + /// `make_inst_results` to allocate value table entries. (It is + /// always safe to call `make_inst_results`, regardless of how + /// many results the instruction has.) + pub fn make_inst(&mut self, data: InstructionData) -> Inst { + let n = self.num_insts() + 1; + self.results.resize(n); + self.insts.0.push(data) + } + + /// Declares a dynamic vector type + pub fn make_dynamic_ty(&mut self, data: DynamicTypeData) -> DynamicType { + self.dynamic_types.push(data) + } + + /// Returns an object that displays `inst`. + pub fn display_inst<'a>(&'a self, inst: Inst) -> DisplayInst<'a> { + DisplayInst(self, inst) + } + + /// Returns an object that displays the given `value`'s defining instruction. + /// + /// Panics if the value is not defined by an instruction (i.e. it is a basic + /// block argument). + pub fn display_value_inst(&self, value: Value) -> DisplayInst<'_> { + match self.value_def(value) { + ir::ValueDef::Result(inst, _) => self.display_inst(inst), + ir::ValueDef::Param(_, _) => panic!("value is not defined by an instruction"), + ir::ValueDef::Union(_, _) => panic!("value is a union of two other values"), + } + } + + /// Construct a read-only visitor context for the values of this instruction. + pub fn inst_values<'dfg>( + &'dfg self, + inst: Inst, + ) -> impl DoubleEndedIterator + 'dfg { + self.inst_args(inst) + .iter() + .chain( + self.insts[inst] + .branch_destination(&self.jump_tables) + .into_iter() + .flat_map(|branch| branch.args_slice(&self.value_lists).iter()), + ) + .copied() + } + + /// Map a function over the values of the instruction. + pub fn map_inst_values(&mut self, inst: Inst, body: F) + where + F: FnMut(Value) -> Value, + { + self.insts[inst].map_values(&mut self.value_lists, &mut self.jump_tables, body); + } + + /// Overwrite the instruction's value references with values from the iterator. + /// NOTE: the iterator provided is expected to yield at least as many values as the instruction + /// currently has. + pub fn overwrite_inst_values(&mut self, inst: Inst, mut values: I) + where + I: Iterator, + { + self.insts[inst].map_values(&mut self.value_lists, &mut self.jump_tables, |_| { + values.next().unwrap() + }); + } + + /// Get all value arguments on `inst` as a slice. + pub fn inst_args(&self, inst: Inst) -> &[Value] { + self.insts[inst].arguments(&self.value_lists) + } + + /// Get all value arguments on `inst` as a mutable slice. + pub fn inst_args_mut(&mut self, inst: Inst) -> &mut [Value] { + self.insts[inst].arguments_mut(&mut self.value_lists) + } + + /// Get the fixed value arguments on `inst` as a slice. + pub fn inst_fixed_args(&self, inst: Inst) -> &[Value] { + let num_fixed_args = self.insts[inst] + .opcode() + .constraints() + .num_fixed_value_arguments(); + &self.inst_args(inst)[..num_fixed_args] + } + + /// Get the fixed value arguments on `inst` as a mutable slice. + pub fn inst_fixed_args_mut(&mut self, inst: Inst) -> &mut [Value] { + let num_fixed_args = self.insts[inst] + .opcode() + .constraints() + .num_fixed_value_arguments(); + &mut self.inst_args_mut(inst)[..num_fixed_args] + } + + /// Get the variable value arguments on `inst` as a slice. + pub fn inst_variable_args(&self, inst: Inst) -> &[Value] { + let num_fixed_args = self.insts[inst] + .opcode() + .constraints() + .num_fixed_value_arguments(); + &self.inst_args(inst)[num_fixed_args..] + } + + /// Get the variable value arguments on `inst` as a mutable slice. + pub fn inst_variable_args_mut(&mut self, inst: Inst) -> &mut [Value] { + let num_fixed_args = self.insts[inst] + .opcode() + .constraints() + .num_fixed_value_arguments(); + &mut self.inst_args_mut(inst)[num_fixed_args..] + } + + /// Create result values for an instruction that produces multiple results. + /// + /// Instructions that produce no result values only need to be created with `make_inst`, + /// otherwise call `make_inst_results` to allocate value table entries for the results. + /// + /// The result value types are determined from the instruction's value type constraints and the + /// provided `ctrl_typevar` type for polymorphic instructions. For non-polymorphic + /// instructions, `ctrl_typevar` is ignored, and `INVALID` can be used. + /// + /// The type of the first result value is also set, even if it was already set in the + /// `InstructionData` passed to `make_inst`. If this function is called with a single-result + /// instruction, that is the only effect. + pub fn make_inst_results(&mut self, inst: Inst, ctrl_typevar: Type) -> usize { + self.make_inst_results_reusing(inst, ctrl_typevar, iter::empty()) + } + + /// Create result values for `inst`, reusing the provided detached values. + /// + /// Create a new set of result values for `inst` using `ctrl_typevar` to determine the result + /// types. Any values provided by `reuse` will be reused. When `reuse` is exhausted or when it + /// produces `None`, a new value is created. + pub fn make_inst_results_reusing( + &mut self, + inst: Inst, + ctrl_typevar: Type, + reuse: I, + ) -> usize + where + I: Iterator>, + { + self.clear_results(inst); + + let mut reuse = reuse.fuse(); + let result_tys: SmallVec<[_; 16]> = self.inst_result_types(inst, ctrl_typevar).collect(); + + for (expected, &ty) in result_tys.iter().enumerate() { + let num = u16::try_from(expected).expect("Result value index should fit in u16"); + let value_data = ValueData::Inst { ty, num, inst }; + let v = if let Some(Some(v)) = reuse.next() { + debug_assert_eq!(self.value_type(v), ty, "Reused {ty} is wrong type"); + debug_assert!(!self.value_is_attached(v)); + self.values[v] = value_data.into(); + v + } else { + self.make_value(value_data) + }; + let actual = self.results[inst].push(v, &mut self.value_lists); + debug_assert_eq!(expected, actual); + } + + result_tys.len() + } + + /// Create a `ReplaceBuilder` that will replace `inst` with a new instruction in place. + pub fn replace(&mut self, inst: Inst) -> ReplaceBuilder { + ReplaceBuilder::new(self, inst) + } + + /// Clear the list of result values from `inst`. + /// + /// This leaves `inst` without any result values. New result values can be created by calling + /// `make_inst_results` or by using a `replace(inst)` builder. + pub fn clear_results(&mut self, inst: Inst) { + self.results[inst].clear(&mut self.value_lists) + } + + /// Replace an instruction result with a new value of type `new_type`. + /// + /// The `old_value` must be an attached instruction result. + /// + /// The old value is left detached, so it should probably be changed into something else. + /// + /// Returns the new value. + pub fn replace_result(&mut self, old_value: Value, new_type: Type) -> Value { + let (num, inst) = match ValueData::from(self.values[old_value]) { + ValueData::Inst { num, inst, .. } => (num, inst), + _ => panic!("{old_value} is not an instruction result value"), + }; + let new_value = self.make_value(ValueData::Inst { + ty: new_type, + num, + inst, + }); + let num = num as usize; + let attached = mem::replace( + self.results[inst] + .get_mut(num, &mut self.value_lists) + .expect("Replacing detached result"), + new_value, + ); + debug_assert_eq!( + attached, + old_value, + "{} wasn't detached from {}", + old_value, + self.display_inst(inst) + ); + new_value + } + + /// Clone an instruction, attaching new result `Value`s and + /// returning them. + pub fn clone_inst(&mut self, inst: Inst) -> Inst { + // First, add a clone of the InstructionData. + let inst_data = self.insts[inst]; + // If the `inst_data` has a reference to a ValueList, clone it + // as well, because we can't share these (otherwise mutating + // one would affect the other). + let inst_data = inst_data.deep_clone(&mut self.value_lists); + let new_inst = self.make_inst(inst_data); + // Get the controlling type variable. + let ctrl_typevar = self.ctrl_typevar(inst); + // Create new result values. + let num_results = self.make_inst_results(new_inst, ctrl_typevar); + // Copy over PCC facts, if any. + for i in 0..num_results { + let old_result = self.inst_results(inst)[i]; + let new_result = self.inst_results(new_inst)[i]; + self.facts[new_result] = self.facts[old_result].clone(); + } + new_inst + } + + /// Get the first result of an instruction. + /// + /// This function panics if the instruction doesn't have any result. + pub fn first_result(&self, inst: Inst) -> Value { + self.results[inst] + .first(&self.value_lists) + .expect("Instruction has no results") + } + + /// Test if `inst` has any result values currently. + pub fn has_results(&self, inst: Inst) -> bool { + !self.results[inst].is_empty() + } + + /// Return all the results of an instruction. + pub fn inst_results(&self, inst: Inst) -> &[Value] { + self.results[inst].as_slice(&self.value_lists) + } + + /// Return all the results of an instruction as ValueList. + pub fn inst_results_list(&self, inst: Inst) -> ValueList { + self.results[inst] + } + + /// Create a union of two values. + pub fn union(&mut self, x: Value, y: Value) -> Value { + // Get the type. + let ty = self.value_type(x); + debug_assert_eq!(ty, self.value_type(y)); + self.make_value(ValueData::Union { ty, x, y }) + } + + /// Get the call signature of a direct or indirect call instruction. + /// Returns `None` if `inst` is not a call instruction. + pub fn call_signature(&self, inst: Inst) -> Option { + match self.insts[inst].analyze_call(&self.value_lists) { + CallInfo::NotACall => None, + CallInfo::Direct(f, _) => Some(self.ext_funcs[f].signature), + CallInfo::Indirect(s, _) => Some(s), + } + } + + /// Like `call_signature` but returns none for tail call instructions. + fn non_tail_call_signature(&self, inst: Inst) -> Option { + let sig = self.call_signature(inst)?; + match self.insts[inst].opcode() { + ir::Opcode::ReturnCall | ir::Opcode::ReturnCallIndirect => None, + _ => Some(sig), + } + } + + // Only for use by the verifier. Everyone else should just use + // `dfg.inst_results(inst).len()`. + pub(crate) fn num_expected_results_for_verifier(&self, inst: Inst) -> usize { + match self.non_tail_call_signature(inst) { + Some(sig) => self.signatures[sig].returns.len(), + None => { + let constraints = self.insts[inst].opcode().constraints(); + constraints.num_fixed_results() + } + } + } + + /// Get the result types of the given instruction. + pub fn inst_result_types<'a>( + &'a self, + inst: Inst, + ctrl_typevar: Type, + ) -> impl iter::ExactSizeIterator + 'a { + return match self.non_tail_call_signature(inst) { + Some(sig) => InstResultTypes::Signature(self, sig, 0), + None => { + let constraints = self.insts[inst].opcode().constraints(); + InstResultTypes::Constraints(constraints, ctrl_typevar, 0) + } + }; + + enum InstResultTypes<'a> { + Signature(&'a DataFlowGraph, SigRef, usize), + Constraints(ir::instructions::OpcodeConstraints, Type, usize), + } + + impl Iterator for InstResultTypes<'_> { + type Item = Type; + + fn next(&mut self) -> Option { + match self { + InstResultTypes::Signature(dfg, sig, i) => { + let param = dfg.signatures[*sig].returns.get(*i)?; + *i += 1; + Some(param.value_type) + } + InstResultTypes::Constraints(constraints, ctrl_ty, i) => { + if *i < constraints.num_fixed_results() { + let ty = constraints.result_type(*i, *ctrl_ty); + *i += 1; + Some(ty) + } else { + None + } + } + } + } + + fn size_hint(&self) -> (usize, Option) { + let len = match self { + InstResultTypes::Signature(dfg, sig, i) => { + dfg.signatures[*sig].returns.len() - *i + } + InstResultTypes::Constraints(constraints, _, i) => { + constraints.num_fixed_results() - *i + } + }; + (len, Some(len)) + } + } + + impl ExactSizeIterator for InstResultTypes<'_> {} + } + + /// Compute the type of an instruction result from opcode constraints and call signatures. + /// + /// This computes the same sequence of result types that `make_inst_results()` above would + /// assign to the created result values, but it does not depend on `make_inst_results()` being + /// called first. + /// + /// Returns `None` if asked about a result index that is too large. + pub fn compute_result_type( + &self, + inst: Inst, + result_idx: usize, + ctrl_typevar: Type, + ) -> Option { + self.inst_result_types(inst, ctrl_typevar).nth(result_idx) + } + + /// Get the controlling type variable, or `INVALID` if `inst` isn't polymorphic. + pub fn ctrl_typevar(&self, inst: Inst) -> Type { + let constraints = self.insts[inst].opcode().constraints(); + + if !constraints.is_polymorphic() { + types::INVALID + } else if constraints.requires_typevar_operand() { + // Not all instruction formats have a designated operand, but in that case + // `requires_typevar_operand()` should never be true. + self.value_type( + self.insts[inst] + .typevar_operand(&self.value_lists) + .unwrap_or_else(|| { + panic!( + "Instruction format for {:?} doesn't have a designated operand", + self.insts[inst] + ) + }), + ) + } else { + self.value_type(self.first_result(inst)) + } + } +} + +/// basic blocks. +impl DataFlowGraph { + /// Create a new basic block. + pub fn make_block(&mut self) -> Block { + self.blocks.add() + } + + /// Get the number of parameters on `block`. + pub fn num_block_params(&self, block: Block) -> usize { + self.blocks[block].params(&self.value_lists).len() + } + + /// Get the parameters on `block`. + pub fn block_params(&self, block: Block) -> &[Value] { + self.blocks[block].params(&self.value_lists) + } + + /// Get the types of the parameters on `block`. + pub fn block_param_types(&self, block: Block) -> impl Iterator + '_ { + self.block_params(block).iter().map(|&v| self.value_type(v)) + } + + /// Append a parameter with type `ty` to `block`. + pub fn append_block_param(&mut self, block: Block, ty: Type) -> Value { + let param = self.values.next_key(); + let num = self.blocks[block].params.push(param, &mut self.value_lists); + debug_assert!(num <= u16::MAX as usize, "Too many parameters on block"); + self.make_value(ValueData::Param { + ty, + num: num as u16, + block, + }) + } + + /// Removes `val` from `block`'s parameters by swapping it with the last parameter on `block`. + /// Returns the position of `val` before removal. + /// + /// *Important*: to ensure O(1) deletion, this method swaps the removed parameter with the + /// last `block` parameter. This can disrupt all the branch instructions jumping to this + /// `block` for which you have to change the branch argument order if necessary. + /// + /// Panics if `val` is not a block parameter. + pub fn swap_remove_block_param(&mut self, val: Value) -> usize { + let (block, num) = + if let ValueData::Param { num, block, .. } = ValueData::from(self.values[val]) { + (block, num) + } else { + panic!("{val} must be a block parameter"); + }; + self.blocks[block] + .params + .swap_remove(num as usize, &mut self.value_lists); + if let Some(last_arg_val) = self.blocks[block] + .params + .get(num as usize, &self.value_lists) + { + // We update the position of the old last arg. + let mut last_arg_data = ValueData::from(self.values[last_arg_val]); + if let ValueData::Param { num: old_num, .. } = &mut last_arg_data { + *old_num = num; + self.values[last_arg_val] = last_arg_data.into(); + } else { + panic!("{last_arg_val} should be a Block parameter"); + } + } + num as usize + } + + /// Removes `val` from `block`'s parameters by a standard linear time list removal which + /// preserves ordering. Also updates the values' data. + pub fn remove_block_param(&mut self, val: Value) { + let (block, num) = + if let ValueData::Param { num, block, .. } = ValueData::from(self.values[val]) { + (block, num) + } else { + panic!("{val} must be a block parameter"); + }; + self.blocks[block] + .params + .remove(num as usize, &mut self.value_lists); + for index in num..(self.num_block_params(block) as u16) { + let packed = &mut self.values[self.blocks[block] + .params + .get(index as usize, &self.value_lists) + .unwrap()]; + let mut data = ValueData::from(*packed); + match &mut data { + ValueData::Param { num, .. } => { + *num -= 1; + *packed = data.into(); + } + _ => panic!( + "{} must be a block parameter", + self.blocks[block] + .params + .get(index as usize, &self.value_lists) + .unwrap() + ), + } + } + } + + /// Append an existing value to `block`'s parameters. + /// + /// The appended value can't already be attached to something else. + /// + /// In almost all cases, you should be using `append_block_param()` instead of this method. + pub fn attach_block_param(&mut self, block: Block, param: Value) { + debug_assert!(!self.value_is_attached(param)); + let num = self.blocks[block].params.push(param, &mut self.value_lists); + debug_assert!(num <= u16::MAX as usize, "Too many parameters on block"); + let ty = self.value_type(param); + self.values[param] = ValueData::Param { + ty, + num: num as u16, + block, + } + .into(); + } + + /// Replace a block parameter with a new value of type `ty`. + /// + /// The `old_value` must be an attached block parameter. It is removed from its place in the list + /// of parameters and replaced by a new value of type `new_type`. The new value gets the same + /// position in the list, and other parameters are not disturbed. + /// + /// The old value is left detached, so it should probably be changed into something else. + /// + /// Returns the new value. + pub fn replace_block_param(&mut self, old_value: Value, new_type: Type) -> Value { + // Create new value identical to the old one except for the type. + let (block, num) = + if let ValueData::Param { num, block, .. } = ValueData::from(self.values[old_value]) { + (block, num) + } else { + panic!("{old_value} must be a block parameter"); + }; + let new_arg = self.make_value(ValueData::Param { + ty: new_type, + num, + block, + }); + + self.blocks[block] + .params + .as_mut_slice(&mut self.value_lists)[num as usize] = new_arg; + new_arg + } + + /// Detach all the parameters from `block` and return them as a `ValueList`. + /// + /// This is a quite low-level operation. Sensible things to do with the detached block parameters + /// is to put them back on the same block with `attach_block_param()` or change them into aliases + /// with `change_to_alias()`. + pub fn detach_block_params(&mut self, block: Block) -> ValueList { + self.blocks[block].params.take() + } + + /// Merge the facts for two values. If both values have facts and + /// they differ, both values get a special "conflict" fact that is + /// never satisfied. + pub fn merge_facts(&mut self, a: Value, b: Value) { + let a = self.resolve_aliases(a); + let b = self.resolve_aliases(b); + match (&self.facts[a], &self.facts[b]) { + (Some(a), Some(b)) if a == b => { /* nothing */ } + (None, None) => { /* nothing */ } + (Some(a), None) => { + self.facts[b] = Some(a.clone()); + } + (None, Some(b)) => { + self.facts[a] = Some(b.clone()); + } + (Some(a_fact), Some(b_fact)) => { + assert_eq!(self.value_type(a), self.value_type(b)); + let merged = Fact::intersect(a_fact, b_fact); + crate::trace!( + "facts merge on {} and {}: {:?}, {:?} -> {:?}", + a, + b, + a_fact, + b_fact, + merged, + ); + self.facts[a] = Some(merged.clone()); + self.facts[b] = Some(merged); + } + } + } +} + +/// Contents of a basic block. +/// +/// Parameters on a basic block are values that dominate everything in the block. All +/// branches to this block must provide matching arguments, and the arguments to the entry block must +/// match the function arguments. +#[derive(Clone, PartialEq, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct BlockData { + /// List of parameters to this block. + params: ValueList, +} + +impl BlockData { + fn new() -> Self { + Self { + params: ValueList::new(), + } + } + + /// Get the parameters on `block`. + pub fn params<'a>(&self, pool: &'a ValueListPool) -> &'a [Value] { + self.params.as_slice(pool) + } +} + +/// Object that can display an instruction. +pub struct DisplayInst<'a>(&'a DataFlowGraph, Inst); + +impl<'a> fmt::Display for DisplayInst<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let dfg = self.0; + let inst = self.1; + + if let Some((first, rest)) = dfg.inst_results(inst).split_first() { + write!(f, "{first}")?; + for v in rest { + write!(f, ", {v}")?; + } + write!(f, " = ")?; + } + + let typevar = dfg.ctrl_typevar(inst); + if typevar.is_invalid() { + write!(f, "{}", dfg.insts[inst].opcode())?; + } else { + write!(f, "{}.{}", dfg.insts[inst].opcode(), typevar)?; + } + write_operands(f, dfg, inst) + } +} + +/// Parser routines. These routines should not be used outside the parser. +impl DataFlowGraph { + /// Set the type of a value. This is only for use in the parser, which needs + /// to create invalid values for index padding which may be reassigned later. + #[cold] + fn set_value_type_for_parser(&mut self, v: Value, t: Type) { + assert_eq!( + self.value_type(v), + types::INVALID, + "this function is only for assigning types to previously invalid values" + ); + self.values[v].set_type(t); + } + + /// Check that the given concrete `Type` has been defined in the function. + pub fn check_dynamic_type(&mut self, ty: Type) -> Option { + debug_assert!(ty.is_dynamic_vector()); + if self + .dynamic_types + .values() + .any(|dyn_ty_data| dyn_ty_data.concrete().unwrap() == ty) + { + Some(ty) + } else { + None + } + } + + /// Create result values for `inst`, reusing the provided detached values. + /// This is similar to `make_inst_results_reusing` except it's only for use + /// in the parser, which needs to reuse previously invalid values. + #[cold] + pub fn make_inst_results_for_parser( + &mut self, + inst: Inst, + ctrl_typevar: Type, + reuse: &[Value], + ) -> usize { + let mut reuse_iter = reuse.iter().copied(); + let result_tys: SmallVec<[_; 16]> = self.inst_result_types(inst, ctrl_typevar).collect(); + for ty in result_tys { + if ty.is_dynamic_vector() { + self.check_dynamic_type(ty) + .unwrap_or_else(|| panic!("Use of undeclared dynamic type: {ty}")); + } + if let Some(v) = reuse_iter.next() { + self.set_value_type_for_parser(v, ty); + } + } + + self.make_inst_results_reusing(inst, ctrl_typevar, reuse.iter().map(|x| Some(*x))) + } + + /// Similar to `append_block_param`, append a parameter with type `ty` to + /// `block`, but using value `val`. This is only for use by the parser to + /// create parameters with specific values. + #[cold] + pub fn append_block_param_for_parser(&mut self, block: Block, ty: Type, val: Value) { + let num = self.blocks[block].params.push(val, &mut self.value_lists); + assert!(num <= u16::MAX as usize, "Too many parameters on block"); + self.values[val] = ValueData::Param { + ty, + num: num as u16, + block, + } + .into(); + } + + /// Create a new value alias. This is only for use by the parser to create + /// aliases with specific values, and the printer for testing. + #[cold] + pub fn make_value_alias_for_serialization(&mut self, src: Value, dest: Value) { + assert_ne!(src, Value::reserved_value()); + assert_ne!(dest, Value::reserved_value()); + + let ty = if self.values.is_valid(src) { + self.value_type(src) + } else { + // As a special case, if we can't resolve the aliasee yet, use INVALID + // temporarily. It will be resolved later in parsing. + types::INVALID + }; + let data = ValueData::Alias { ty, original: src }; + self.values[dest] = data.into(); + } + + /// If `v` is already defined as an alias, return its destination value. + /// Otherwise return None. This allows the parser to coalesce identical + /// alias definitions, and the printer to identify an alias's immediate target. + #[cold] + pub fn value_alias_dest_for_serialization(&self, v: Value) -> Option { + if let ValueData::Alias { original, .. } = ValueData::from(self.values[v]) { + Some(original) + } else { + None + } + } + + /// Compute the type of an alias. This is only for use in the parser. + /// Returns false if an alias cycle was encountered. + #[cold] + pub fn set_alias_type_for_parser(&mut self, v: Value) -> bool { + if let Some(resolved) = maybe_resolve_aliases(&self.values, v) { + let old_ty = self.value_type(v); + let new_ty = self.value_type(resolved); + if old_ty == types::INVALID { + self.set_value_type_for_parser(v, new_ty); + } else { + assert_eq!(old_ty, new_ty); + } + true + } else { + false + } + } + + /// Create an invalid value, to pad the index space. This is only for use by + /// the parser to pad out the value index space. + #[cold] + pub fn make_invalid_value_for_parser(&mut self) { + let data = ValueData::Alias { + ty: types::INVALID, + original: Value::reserved_value(), + }; + self.make_value(data); + } + + /// Check if a value reference is valid, while being aware of aliases which + /// may be unresolved while parsing. + #[cold] + pub fn value_is_valid_for_parser(&self, v: Value) -> bool { + if !self.value_is_valid(v) { + return false; + } + if let ValueData::Alias { ty, .. } = ValueData::from(self.values[v]) { + ty != types::INVALID + } else { + true + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::cursor::{Cursor, FuncCursor}; + use crate::ir::{Function, Opcode, TrapCode}; + use alloc::string::ToString; + + #[test] + fn make_inst() { + let mut dfg = DataFlowGraph::new(); + + let idata = InstructionData::UnaryImm { + opcode: Opcode::Iconst, + imm: 0.into(), + }; + let inst = dfg.make_inst(idata); + + dfg.make_inst_results(inst, types::I32); + assert_eq!(inst.to_string(), "inst0"); + assert_eq!(dfg.display_inst(inst).to_string(), "v0 = iconst.i32 0"); + + // Immutable reference resolution. + { + let immdfg = &dfg; + let ins = &immdfg.insts[inst]; + assert_eq!(ins.opcode(), Opcode::Iconst); + } + + // Results. + let val = dfg.first_result(inst); + assert_eq!(dfg.inst_results(inst), &[val]); + + assert_eq!(dfg.value_def(val), ValueDef::Result(inst, 0)); + assert_eq!(dfg.value_type(val), types::I32); + + // Replacing results. + assert!(dfg.value_is_attached(val)); + let v2 = dfg.replace_result(val, types::F64); + assert!(!dfg.value_is_attached(val)); + assert!(dfg.value_is_attached(v2)); + assert_eq!(dfg.inst_results(inst), &[v2]); + assert_eq!(dfg.value_def(v2), ValueDef::Result(inst, 0)); + assert_eq!(dfg.value_type(v2), types::F64); + } + + #[test] + fn no_results() { + let mut dfg = DataFlowGraph::new(); + + let idata = InstructionData::Trap { + opcode: Opcode::Trap, + code: TrapCode::unwrap_user(1), + }; + let inst = dfg.make_inst(idata); + assert_eq!(dfg.display_inst(inst).to_string(), "trap user1"); + + // Result slice should be empty. + assert_eq!(dfg.inst_results(inst), &[]); + } + + #[test] + fn block() { + let mut dfg = DataFlowGraph::new(); + + let block = dfg.make_block(); + assert_eq!(block.to_string(), "block0"); + assert_eq!(dfg.num_block_params(block), 0); + assert_eq!(dfg.block_params(block), &[]); + assert!(dfg.detach_block_params(block).is_empty()); + assert_eq!(dfg.num_block_params(block), 0); + assert_eq!(dfg.block_params(block), &[]); + + let arg1 = dfg.append_block_param(block, types::F32); + assert_eq!(arg1.to_string(), "v0"); + assert_eq!(dfg.num_block_params(block), 1); + assert_eq!(dfg.block_params(block), &[arg1]); + + let arg2 = dfg.append_block_param(block, types::I16); + assert_eq!(arg2.to_string(), "v1"); + assert_eq!(dfg.num_block_params(block), 2); + assert_eq!(dfg.block_params(block), &[arg1, arg2]); + + assert_eq!(dfg.value_def(arg1), ValueDef::Param(block, 0)); + assert_eq!(dfg.value_def(arg2), ValueDef::Param(block, 1)); + assert_eq!(dfg.value_type(arg1), types::F32); + assert_eq!(dfg.value_type(arg2), types::I16); + + // Swap the two block parameters. + let vlist = dfg.detach_block_params(block); + assert_eq!(dfg.num_block_params(block), 0); + assert_eq!(dfg.block_params(block), &[]); + assert_eq!(vlist.as_slice(&dfg.value_lists), &[arg1, arg2]); + dfg.attach_block_param(block, arg2); + let arg3 = dfg.append_block_param(block, types::I32); + dfg.attach_block_param(block, arg1); + assert_eq!(dfg.block_params(block), &[arg2, arg3, arg1]); + } + + #[test] + fn replace_block_params() { + let mut dfg = DataFlowGraph::new(); + + let block = dfg.make_block(); + let arg1 = dfg.append_block_param(block, types::F32); + + let new1 = dfg.replace_block_param(arg1, types::I64); + assert_eq!(dfg.value_type(arg1), types::F32); + assert_eq!(dfg.value_type(new1), types::I64); + assert_eq!(dfg.block_params(block), &[new1]); + + dfg.attach_block_param(block, arg1); + assert_eq!(dfg.block_params(block), &[new1, arg1]); + + let new2 = dfg.replace_block_param(arg1, types::I8); + assert_eq!(dfg.value_type(arg1), types::F32); + assert_eq!(dfg.value_type(new2), types::I8); + assert_eq!(dfg.block_params(block), &[new1, new2]); + + dfg.attach_block_param(block, arg1); + assert_eq!(dfg.block_params(block), &[new1, new2, arg1]); + + let new3 = dfg.replace_block_param(new2, types::I16); + assert_eq!(dfg.value_type(new1), types::I64); + assert_eq!(dfg.value_type(new2), types::I8); + assert_eq!(dfg.value_type(new3), types::I16); + assert_eq!(dfg.block_params(block), &[new1, new3, arg1]); + } + + #[test] + fn swap_remove_block_params() { + let mut dfg = DataFlowGraph::new(); + + let block = dfg.make_block(); + let arg1 = dfg.append_block_param(block, types::F32); + let arg2 = dfg.append_block_param(block, types::F32); + let arg3 = dfg.append_block_param(block, types::F32); + assert_eq!(dfg.block_params(block), &[arg1, arg2, arg3]); + + dfg.swap_remove_block_param(arg1); + assert_eq!(dfg.value_is_attached(arg1), false); + assert_eq!(dfg.value_is_attached(arg2), true); + assert_eq!(dfg.value_is_attached(arg3), true); + assert_eq!(dfg.block_params(block), &[arg3, arg2]); + dfg.swap_remove_block_param(arg2); + assert_eq!(dfg.value_is_attached(arg2), false); + assert_eq!(dfg.value_is_attached(arg3), true); + assert_eq!(dfg.block_params(block), &[arg3]); + dfg.swap_remove_block_param(arg3); + assert_eq!(dfg.value_is_attached(arg3), false); + assert_eq!(dfg.block_params(block), &[]); + } + + #[test] + fn aliases() { + use crate::ir::condcodes::IntCC; + use crate::ir::InstBuilder; + + let mut func = Function::new(); + let block0 = func.dfg.make_block(); + let mut pos = FuncCursor::new(&mut func); + pos.insert_block(block0); + + // Build a little test program. + let v1 = pos.ins().iconst(types::I32, 42); + + // Make sure we can resolve value aliases even when values is empty. + assert_eq!(pos.func.dfg.resolve_aliases(v1), v1); + + let arg0 = pos.func.dfg.append_block_param(block0, types::I32); + let (s, c) = pos.ins().uadd_overflow(v1, arg0); + let iadd = match pos.func.dfg.value_def(s) { + ValueDef::Result(i, 0) => i, + _ => panic!(), + }; + + // Remove `c` from the result list. + pos.func.stencil.dfg.results[iadd].remove(1, &mut pos.func.stencil.dfg.value_lists); + + // Replace `uadd_overflow` with a normal `iadd` and an `icmp`. + pos.func.dfg.replace(iadd).iadd(v1, arg0); + let c2 = pos.ins().icmp(IntCC::Equal, s, v1); + pos.func.dfg.change_to_alias(c, c2); + + assert_eq!(pos.func.dfg.resolve_aliases(c2), c2); + assert_eq!(pos.func.dfg.resolve_aliases(c), c2); + } + + #[test] + fn cloning() { + use crate::ir::InstBuilder; + + let mut func = Function::new(); + let mut sig = Signature::new(crate::isa::CallConv::SystemV); + sig.params.push(ir::AbiParam::new(types::I32)); + let sig = func.import_signature(sig); + let block0 = func.dfg.make_block(); + let mut pos = FuncCursor::new(&mut func); + pos.insert_block(block0); + let v1 = pos.ins().iconst(types::I32, 0); + let v2 = pos.ins().iconst(types::I32, 1); + let call_inst = pos.ins().call_indirect(sig, v1, &[v1]); + let func = pos.func; + + let call_inst_dup = func.dfg.clone_inst(call_inst); + func.dfg.inst_args_mut(call_inst)[0] = v2; + assert_eq!(v1, func.dfg.inst_args(call_inst_dup)[0]); + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/dynamic_type.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/dynamic_type.rs new file mode 100644 index 000000000..d98ced809 --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/dynamic_type.rs @@ -0,0 +1,54 @@ +//! Dynamic IR types + +use crate::ir::entities::DynamicType; +use crate::ir::types::*; +use crate::ir::GlobalValue; +use crate::ir::PrimaryMap; + +#[cfg(feature = "enable-serde")] +use serde_derive::{Deserialize, Serialize}; + +/// A dynamic type object which has a base vector type and a scaling factor. +#[derive(Clone, PartialEq, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct DynamicTypeData { + /// Base vector type, this is the minimum size of the type. + pub base_vector_ty: Type, + /// The dynamic scaling factor of the base vector type. + pub dynamic_scale: GlobalValue, +} + +impl DynamicTypeData { + /// Create a new dynamic type. + pub fn new(base_vector_ty: Type, dynamic_scale: GlobalValue) -> Self { + assert!(base_vector_ty.is_vector()); + Self { + base_vector_ty, + dynamic_scale, + } + } + + /// Convert 'base_vector_ty' into a concrete dynamic vector type. + pub fn concrete(&self) -> Option { + self.base_vector_ty.vector_to_dynamic() + } +} + +/// All allocated dynamic types. +pub type DynamicTypes = PrimaryMap; + +/// Convert a dynamic-vector type to a fixed-vector type. +pub fn dynamic_to_fixed(ty: Type) -> Type { + match ty { + I8X8XN => I8X8, + I8X16XN => I8X16, + I16X4XN => I16X4, + I16X8XN => I16X8, + I32X2XN => I32X2, + I32X4XN => I32X4, + I64X2XN => I64X2, + F32X4XN => F32X4, + F64X2XN => F64X2, + _ => unreachable!("unhandled type: {}", ty), + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/entities.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/entities.rs new file mode 100644 index 000000000..005007471 --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/entities.rs @@ -0,0 +1,558 @@ +//! Cranelift IR entity references. +//! +//! Instructions in Cranelift IR need to reference other entities in the function. This can be other +//! parts of the function like basic blocks or stack slots, or it can be external entities +//! that are declared in the function preamble in the text format. +//! +//! These entity references in instruction operands are not implemented as Rust references both +//! because Rust's ownership and mutability rules make it difficult, and because 64-bit pointers +//! take up a lot of space, and we want a compact in-memory representation. Instead, entity +//! references are structs wrapping a `u32` index into a table in the `Function` main data +//! structure. There is a separate index type for each entity type, so we don't lose type safety. +//! +//! The `entities` module defines public types for the entity references along with constants +//! representing an invalid reference. We prefer to use `Option` whenever possible, but +//! unfortunately that type is twice as large as the 32-bit index type on its own. Thus, compact +//! data structures use the `PackedOption` representation, while function arguments and +//! return values prefer the more Rust-like `Option` variant. +//! +//! The entity references all implement the `Display` trait in a way that matches the textual IR +//! format. + +use crate::entity::entity_impl; +use core::fmt; +use core::u32; +#[cfg(feature = "enable-serde")] +use serde_derive::{Deserialize, Serialize}; + +/// An opaque reference to a [basic block](https://en.wikipedia.org/wiki/Basic_block) in a +/// [`Function`](super::function::Function). +/// +/// You can get a `Block` using +/// [`FunctionBuilder::create_block`](https://docs.rs/cranelift-frontend/*/cranelift_frontend/struct.FunctionBuilder.html#method.create_block) +/// +/// While the order is stable, it is arbitrary and does not necessarily resemble the layout order. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct Block(u32); +entity_impl!(Block, "block"); + +impl Block { + /// Create a new block reference from its number. This corresponds to the `blockNN` representation. + /// + /// This method is for use by the parser. + pub fn with_number(n: u32) -> Option { + if n < u32::MAX { + Some(Self(n)) + } else { + None + } + } +} + +/// An opaque reference to an SSA value. +/// +/// You can get a constant `Value` from the following +/// [`InstBuilder`](super::InstBuilder) instructions: +/// +/// - [`iconst`](super::InstBuilder::iconst) for integer constants +/// - [`f16const`](super::InstBuilder::f16const) for 16-bit float constants +/// - [`f32const`](super::InstBuilder::f32const) for 32-bit float constants +/// - [`f64const`](super::InstBuilder::f64const) for 64-bit float constants +/// - [`f128const`](super::InstBuilder::f128const) for 128-bit float constants +/// - [`vconst`](super::InstBuilder::vconst) for vector constants +/// - [`null`](super::InstBuilder::null) for null reference constants +/// +/// Any `InstBuilder` instruction that has an output will also return a `Value`. +/// +/// While the order is stable, it is arbitrary. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct Value(u32); +entity_impl!(Value, "v"); + +impl Value { + /// Create a value from its number representation. + /// This is the number in the `vNN` notation. + /// + /// This method is for use by the parser. + pub fn with_number(n: u32) -> Option { + if n < u32::MAX / 2 { + Some(Self(n)) + } else { + None + } + } +} + +/// An opaque reference to an instruction in a [`Function`](super::Function). +/// +/// Most usage of `Inst` is internal. `Inst`ructions are returned by +/// [`InstBuilder`](super::InstBuilder) instructions that do not return a +/// [`Value`], such as control flow and trap instructions, as well as instructions that return a +/// variable (potentially zero!) number of values, like call or call-indirect instructions. To get +/// the `Value` of such instructions, use [`inst_results`](super::DataFlowGraph::inst_results) or +/// its analogue in `cranelift_frontend::FuncBuilder`. +/// +/// [inst_comment]: https://github.com/bjorn3/rustc_codegen_cranelift/blob/0f8814fd6da3d436a90549d4bb19b94034f2b19c/src/pretty_clif.rs +/// +/// While the order is stable, it is arbitrary and does not necessarily resemble the layout order. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct Inst(u32); +entity_impl!(Inst, "inst"); + +/// An opaque reference to a stack slot. +/// +/// Stack slots represent an address on the +/// [call stack](https://en.wikipedia.org/wiki/Call_stack). +/// +/// `StackSlot`s can be created with +/// [`FunctionBuilder::create_sized_stack_slot`](https://docs.rs/cranelift-frontend/*/cranelift_frontend/struct.FunctionBuilder.html#method.create_sized_stack_slot) +/// or +/// [`FunctionBuilder::create_dynamic_stack_slot`](https://docs.rs/cranelift-frontend/*/cranelift_frontend/struct.FunctionBuilder.html#method.create_dynamic_stack_slot). +/// +/// `StackSlot`s are most often used with +/// [`stack_addr`](super::InstBuilder::stack_addr), +/// [`stack_load`](super::InstBuilder::stack_load), and +/// [`stack_store`](super::InstBuilder::stack_store). +/// +/// While the order is stable, it is arbitrary and does not necessarily resemble the stack order. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct StackSlot(u32); +entity_impl!(StackSlot, "ss"); + +impl StackSlot { + /// Create a new stack slot reference from its number. + /// + /// This method is for use by the parser. + pub fn with_number(n: u32) -> Option { + if n < u32::MAX { + Some(Self(n)) + } else { + None + } + } +} + +/// An opaque reference to a dynamic stack slot. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct DynamicStackSlot(u32); +entity_impl!(DynamicStackSlot, "dss"); + +impl DynamicStackSlot { + /// Create a new stack slot reference from its number. + /// + /// This method is for use by the parser. + pub fn with_number(n: u32) -> Option { + if n < u32::MAX { + Some(Self(n)) + } else { + None + } + } +} + +/// An opaque reference to a dynamic type. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct DynamicType(u32); +entity_impl!(DynamicType, "dt"); + +impl DynamicType { + /// Create a new dynamic type reference from its number. + /// + /// This method is for use by the parser. + pub fn with_number(n: u32) -> Option { + if n < u32::MAX { + Some(Self(n)) + } else { + None + } + } +} + +/// An opaque reference to a global value. +/// +/// A `GlobalValue` is a [`Value`] that will be live across the entire +/// function lifetime. It can be preloaded from other global values. +/// +/// You can create a `GlobalValue` in the following ways: +/// +/// - When compiling to native code, you can use it for objects in static memory with +/// [`Module::declare_data_in_func`](https://docs.rs/cranelift-module/*/cranelift_module/trait.Module.html#method.declare_data_in_func). +/// - For any compilation target, it can be registered with +/// [`FunctionBuilder::create_global_value`](https://docs.rs/cranelift-frontend/*/cranelift_frontend/struct.FunctionBuilder.html#method.create_global_value). +/// +/// `GlobalValue`s can be retrieved with +/// [`InstBuilder:global_value`](super::InstBuilder::global_value). +/// +/// While the order is stable, it is arbitrary. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct GlobalValue(u32); +entity_impl!(GlobalValue, "gv"); + +impl GlobalValue { + /// Create a new global value reference from its number. + /// + /// This method is for use by the parser. + pub fn with_number(n: u32) -> Option { + if n < u32::MAX { + Some(Self(n)) + } else { + None + } + } +} + +/// An opaque reference to a memory type. +/// +/// A `MemoryType` is a descriptor of a struct layout in memory, with +/// types and proof-carrying-code facts optionally attached to the +/// fields. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct MemoryType(u32); +entity_impl!(MemoryType, "mt"); + +impl MemoryType { + /// Create a new memory type reference from its number. + /// + /// This method is for use by the parser. + pub fn with_number(n: u32) -> Option { + if n < u32::MAX { + Some(Self(n)) + } else { + None + } + } +} + +/// An opaque reference to a constant. +/// +/// You can store [`ConstantData`](super::ConstantData) in a +/// [`ConstantPool`](super::ConstantPool) for efficient storage and retrieval. +/// See [`ConstantPool::insert`](super::ConstantPool::insert). +/// +/// While the order is stable, it is arbitrary and does not necessarily resemble the order in which +/// the constants are written in the constant pool. +#[derive(Copy, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct Constant(u32); +entity_impl!(Constant, "const"); + +impl Constant { + /// Create a const reference from its number. + /// + /// This method is for use by the parser. + pub fn with_number(n: u32) -> Option { + if n < u32::MAX { + Some(Self(n)) + } else { + None + } + } +} + +/// An opaque reference to an immediate. +/// +/// Some immediates (e.g. SIMD shuffle masks) are too large to store in the +/// [`InstructionData`](super::instructions::InstructionData) struct and therefore must be +/// tracked separately in [`DataFlowGraph::immediates`](super::dfg::DataFlowGraph). `Immediate` +/// provides a way to reference values stored there. +/// +/// While the order is stable, it is arbitrary. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct Immediate(u32); +entity_impl!(Immediate, "imm"); + +impl Immediate { + /// Create an immediate reference from its number. + /// + /// This method is for use by the parser. + pub fn with_number(n: u32) -> Option { + if n < u32::MAX { + Some(Self(n)) + } else { + None + } + } +} + +/// An opaque reference to a [jump table](https://en.wikipedia.org/wiki/Branch_table). +/// +/// `JumpTable`s are used for indirect branching and are specialized for dense, +/// 0-based jump offsets. If you want a jump table which doesn't start at 0, +/// or is not contiguous, consider using a [`Switch`](https://docs.rs/cranelift-frontend/*/cranelift_frontend/struct.Switch.html) instead. +/// +/// `JumpTable` are used with [`br_table`](super::InstBuilder::br_table). +/// +/// `JumpTable`s can be created with +/// [`create_jump_table`](https://docs.rs/cranelift-frontend/*/cranelift_frontend/struct.FunctionBuilder.html#method.create_jump_table). +/// +/// While the order is stable, it is arbitrary. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct JumpTable(u32); +entity_impl!(JumpTable, "jt"); + +impl JumpTable { + /// Create a new jump table reference from its number. + /// + /// This method is for use by the parser. + pub fn with_number(n: u32) -> Option { + if n < u32::MAX { + Some(Self(n)) + } else { + None + } + } +} + +/// An opaque reference to another [`Function`](super::Function). +/// +/// `FuncRef`s are used for [direct](super::InstBuilder::call) function calls +/// and by [`func_addr`](super::InstBuilder::func_addr) for use in +/// [indirect](super::InstBuilder::call_indirect) function calls. +/// +/// `FuncRef`s can be created with +/// +/// - [`FunctionBuilder::import_function`](https://docs.rs/cranelift-frontend/*/cranelift_frontend/struct.FunctionBuilder.html#method.import_function) +/// for external functions +/// - [`Module::declare_func_in_func`](https://docs.rs/cranelift-module/*/cranelift_module/trait.Module.html#method.declare_func_in_func) +/// for functions declared elsewhere in the same native +/// [`Module`](https://docs.rs/cranelift-module/*/cranelift_module/trait.Module.html) +/// +/// While the order is stable, it is arbitrary. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct FuncRef(u32); +entity_impl!(FuncRef, "fn"); + +impl FuncRef { + /// Create a new external function reference from its number. + /// + /// This method is for use by the parser. + pub fn with_number(n: u32) -> Option { + if n < u32::MAX { + Some(Self(n)) + } else { + None + } + } +} + +/// A reference to an `UserExternalName`, declared with `Function::declare_imported_user_function`. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Default)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct UserExternalNameRef(u32); +entity_impl!(UserExternalNameRef, "userextname"); + +/// An opaque reference to a function [`Signature`](super::Signature). +/// +/// `SigRef`s are used to declare a function with +/// [`FunctionBuilder::import_function`](https://docs.rs/cranelift-frontend/*/cranelift_frontend/struct.FunctionBuilder.html#method.import_function) +/// as well as to make an [indirect function call](super::InstBuilder::call_indirect). +/// +/// `SigRef`s can be created with +/// [`FunctionBuilder::import_signature`](https://docs.rs/cranelift-frontend/*/cranelift_frontend/struct.FunctionBuilder.html#method.import_signature). +/// +/// You can retrieve the [`Signature`](super::Signature) that was used to create a `SigRef` with +/// [`FunctionBuilder::signature`](https://docs.rs/cranelift-frontend/*/cranelift_frontend/struct.FunctionBuilder.html#method.signature) or +/// [`func.dfg.signatures`](super::dfg::DataFlowGraph::signatures). +/// +/// While the order is stable, it is arbitrary. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct SigRef(u32); +entity_impl!(SigRef, "sig"); + +impl SigRef { + /// Create a new function signature reference from its number. + /// + /// This method is for use by the parser. + pub fn with_number(n: u32) -> Option { + if n < u32::MAX { + Some(Self(n)) + } else { + None + } + } +} + +/// An opaque reference to any of the entities defined in this module that can appear in CLIF IR. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub enum AnyEntity { + /// The whole function. + Function, + /// a basic block. + Block(Block), + /// An instruction. + Inst(Inst), + /// An SSA value. + Value(Value), + /// A stack slot. + StackSlot(StackSlot), + /// A dynamic stack slot. + DynamicStackSlot(DynamicStackSlot), + /// A dynamic type + DynamicType(DynamicType), + /// A Global value. + GlobalValue(GlobalValue), + /// A memory type. + MemoryType(MemoryType), + /// A jump table. + JumpTable(JumpTable), + /// A constant. + Constant(Constant), + /// An external function. + FuncRef(FuncRef), + /// A function call signature. + SigRef(SigRef), + /// A function's stack limit + StackLimit, +} + +impl fmt::Display for AnyEntity { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + Self::Function => write!(f, "function"), + Self::Block(r) => r.fmt(f), + Self::Inst(r) => r.fmt(f), + Self::Value(r) => r.fmt(f), + Self::StackSlot(r) => r.fmt(f), + Self::DynamicStackSlot(r) => r.fmt(f), + Self::DynamicType(r) => r.fmt(f), + Self::GlobalValue(r) => r.fmt(f), + Self::MemoryType(r) => r.fmt(f), + Self::JumpTable(r) => r.fmt(f), + Self::Constant(r) => r.fmt(f), + Self::FuncRef(r) => r.fmt(f), + Self::SigRef(r) => r.fmt(f), + Self::StackLimit => write!(f, "stack_limit"), + } + } +} + +impl fmt::Debug for AnyEntity { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + (self as &dyn fmt::Display).fmt(f) + } +} + +impl From for AnyEntity { + fn from(r: Block) -> Self { + Self::Block(r) + } +} + +impl From for AnyEntity { + fn from(r: Inst) -> Self { + Self::Inst(r) + } +} + +impl From for AnyEntity { + fn from(r: Value) -> Self { + Self::Value(r) + } +} + +impl From for AnyEntity { + fn from(r: StackSlot) -> Self { + Self::StackSlot(r) + } +} + +impl From for AnyEntity { + fn from(r: DynamicStackSlot) -> Self { + Self::DynamicStackSlot(r) + } +} + +impl From for AnyEntity { + fn from(r: DynamicType) -> Self { + Self::DynamicType(r) + } +} + +impl From for AnyEntity { + fn from(r: GlobalValue) -> Self { + Self::GlobalValue(r) + } +} + +impl From for AnyEntity { + fn from(r: MemoryType) -> Self { + Self::MemoryType(r) + } +} + +impl From for AnyEntity { + fn from(r: JumpTable) -> Self { + Self::JumpTable(r) + } +} + +impl From for AnyEntity { + fn from(r: Constant) -> Self { + Self::Constant(r) + } +} + +impl From for AnyEntity { + fn from(r: FuncRef) -> Self { + Self::FuncRef(r) + } +} + +impl From for AnyEntity { + fn from(r: SigRef) -> Self { + Self::SigRef(r) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use alloc::string::ToString; + + #[test] + fn value_with_number() { + assert_eq!(Value::with_number(0).unwrap().to_string(), "v0"); + assert_eq!(Value::with_number(1).unwrap().to_string(), "v1"); + + assert_eq!(Value::with_number(u32::MAX / 2), None); + assert!(Value::with_number(u32::MAX / 2 - 1).is_some()); + } + + #[test] + fn memory() { + use crate::packed_option::PackedOption; + use core::mem; + // This is the whole point of `PackedOption`. + assert_eq!( + mem::size_of::(), + mem::size_of::>() + ); + } + + #[test] + fn memory_option() { + use core::mem; + // PackedOption is used because Option is twice as large + // as EntityRef. If this ever fails to be the case, this test will fail. + assert_eq!(mem::size_of::() * 2, mem::size_of::>()); + } + + #[test] + fn constant_with_number() { + assert_eq!(Constant::with_number(0).unwrap().to_string(), "const0"); + assert_eq!(Constant::with_number(1).unwrap().to_string(), "const1"); + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/extfunc.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/extfunc.rs new file mode 100644 index 000000000..319ec4038 --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/extfunc.rs @@ -0,0 +1,408 @@ +//! External function calls. +//! +//! To a Cranelift function, all functions are "external". Directly called functions must be +//! declared in the preamble, and all function calls must have a signature. +//! +//! This module declares the data types used to represent external functions and call signatures. + +use crate::ir::{ExternalName, SigRef, Type}; +use crate::isa::CallConv; +use alloc::vec::Vec; +use core::fmt; +use core::str::FromStr; +#[cfg(feature = "enable-serde")] +use serde_derive::{Deserialize, Serialize}; + +use super::function::FunctionParameters; + +/// Function signature. +/// +/// The function signature describes the types of formal parameters and return values along with +/// other details that are needed to call a function correctly. +/// +/// A signature can optionally include ISA-specific ABI information which specifies exactly how +/// arguments and return values are passed. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct Signature { + /// The arguments passed to the function. + pub params: Vec, + /// Values returned from the function. + pub returns: Vec, + + /// Calling convention. + pub call_conv: CallConv, +} + +impl Signature { + /// Create a new blank signature. + pub fn new(call_conv: CallConv) -> Self { + Self { + params: Vec::new(), + returns: Vec::new(), + call_conv, + } + } + + /// Clear the signature so it is identical to a fresh one returned by `new()`. + pub fn clear(&mut self, call_conv: CallConv) { + self.params.clear(); + self.returns.clear(); + self.call_conv = call_conv; + } + + /// Find the index of a presumed unique special-purpose parameter. + pub fn special_param_index(&self, purpose: ArgumentPurpose) -> Option { + self.params.iter().rposition(|arg| arg.purpose == purpose) + } + + /// Find the index of a presumed unique special-purpose parameter. + pub fn special_return_index(&self, purpose: ArgumentPurpose) -> Option { + self.returns.iter().rposition(|arg| arg.purpose == purpose) + } + + /// Does this signature have a parameter whose `ArgumentPurpose` is + /// `purpose`? + pub fn uses_special_param(&self, purpose: ArgumentPurpose) -> bool { + self.special_param_index(purpose).is_some() + } + + /// Does this signature have a return whose `ArgumentPurpose` is `purpose`? + pub fn uses_special_return(&self, purpose: ArgumentPurpose) -> bool { + self.special_return_index(purpose).is_some() + } + + /// How many special parameters does this function have? + pub fn num_special_params(&self) -> usize { + self.params + .iter() + .filter(|p| p.purpose != ArgumentPurpose::Normal) + .count() + } + + /// How many special returns does this function have? + pub fn num_special_returns(&self) -> usize { + self.returns + .iter() + .filter(|r| r.purpose != ArgumentPurpose::Normal) + .count() + } + + /// Does this signature take an struct return pointer parameter? + pub fn uses_struct_return_param(&self) -> bool { + self.uses_special_param(ArgumentPurpose::StructReturn) + } + + /// Does this return more than one normal value? (Pre-struct return + /// legalization) + pub fn is_multi_return(&self) -> bool { + self.returns + .iter() + .filter(|r| r.purpose == ArgumentPurpose::Normal) + .count() + > 1 + } +} + +fn write_list(f: &mut fmt::Formatter, args: &[AbiParam]) -> fmt::Result { + match args.split_first() { + None => {} + Some((first, rest)) => { + write!(f, "{first}")?; + for arg in rest { + write!(f, ", {arg}")?; + } + } + } + Ok(()) +} + +impl fmt::Display for Signature { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "(")?; + write_list(f, &self.params)?; + write!(f, ")")?; + if !self.returns.is_empty() { + write!(f, " -> ")?; + write_list(f, &self.returns)?; + } + write!(f, " {}", self.call_conv) + } +} + +/// Function parameter or return value descriptor. +/// +/// This describes the value type being passed to or from a function along with flags that affect +/// how the argument is passed. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct AbiParam { + /// Type of the argument value. + pub value_type: Type, + /// Special purpose of argument, or `Normal`. + pub purpose: ArgumentPurpose, + /// Method for extending argument to a full register. + pub extension: ArgumentExtension, +} + +impl AbiParam { + /// Create a parameter with default flags. + pub fn new(vt: Type) -> Self { + Self { + value_type: vt, + extension: ArgumentExtension::None, + purpose: ArgumentPurpose::Normal, + } + } + + /// Create a special-purpose parameter that is not (yet) bound to a specific register. + pub fn special(vt: Type, purpose: ArgumentPurpose) -> Self { + Self { + value_type: vt, + extension: ArgumentExtension::None, + purpose, + } + } + + /// Convert `self` to a parameter with the `uext` flag set. + pub fn uext(self) -> Self { + debug_assert!(self.value_type.is_int(), "uext on {} arg", self.value_type); + Self { + extension: ArgumentExtension::Uext, + ..self + } + } + + /// Convert `self` to a parameter type with the `sext` flag set. + pub fn sext(self) -> Self { + debug_assert!(self.value_type.is_int(), "sext on {} arg", self.value_type); + Self { + extension: ArgumentExtension::Sext, + ..self + } + } +} + +impl fmt::Display for AbiParam { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.value_type)?; + match self.extension { + ArgumentExtension::None => {} + ArgumentExtension::Uext => write!(f, " uext")?, + ArgumentExtension::Sext => write!(f, " sext")?, + } + if self.purpose != ArgumentPurpose::Normal { + write!(f, " {}", self.purpose)?; + } + Ok(()) + } +} + +/// Function argument extension options. +/// +/// On some architectures, small integer function arguments and/or return values are extended to +/// the width of a general-purpose register. +/// +/// This attribute specifies how an argument or return value should be extended *if the platform +/// and ABI require it*. Because the frontend (CLIF generator) does not know anything about the +/// particulars of the target's ABI, and the CLIF should be platform-independent, these attributes +/// specify *how* to extend (according to the signedness of the original program) rather than +/// *whether* to extend. +#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub enum ArgumentExtension { + /// No extension, high bits are indeterminate. + None, + /// Unsigned extension: high bits in register are 0. + Uext, + /// Signed extension: high bits in register replicate sign bit. + Sext, +} + +/// The special purpose of a function argument. +/// +/// Function arguments and return values are used to pass user program values between functions, +/// but they are also used to represent special registers with significance to the ABI such as +/// frame pointers and callee-saved registers. +/// +/// The argument purpose is used to indicate any special meaning of an argument or return value. +#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub enum ArgumentPurpose { + /// A normal user program value passed to or from a function. + Normal, + + /// A C struct passed as argument. + /// + /// Note that this should only be used when interacting with code following + /// a C ABI which is expecting a struct passed *by value*. + StructArgument( + /// The size, in bytes, of the struct. + u32, + ), + + /// Struct return pointer. + /// + /// When a function needs to return more data than will fit in registers, the caller passes a + /// pointer to a memory location where the return value can be written. In some ABIs, this + /// struct return pointer is passed in a specific register. + /// + /// This argument kind can also appear as a return value for ABIs that require a function with + /// a `StructReturn` pointer argument to also return that pointer in a register. + StructReturn, + + /// A VM context pointer. + /// + /// This is a pointer to a context struct containing details about the current sandbox. It is + /// used as a base pointer for `vmctx` global values. + VMContext, +} + +impl fmt::Display for ArgumentPurpose { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(match self { + Self::Normal => "normal", + Self::StructArgument(size) => return write!(f, "sarg({size})"), + Self::StructReturn => "sret", + Self::VMContext => "vmctx", + }) + } +} + +impl FromStr for ArgumentPurpose { + type Err = (); + fn from_str(s: &str) -> Result { + match s { + "normal" => Ok(Self::Normal), + "sret" => Ok(Self::StructReturn), + "vmctx" => Ok(Self::VMContext), + _ if s.starts_with("sarg(") => { + if !s.ends_with(")") { + return Err(()); + } + // Parse 'sarg(size)' + let size: u32 = s["sarg(".len()..s.len() - 1].parse().map_err(|_| ())?; + Ok(Self::StructArgument(size)) + } + _ => Err(()), + } + } +} + +/// An external function. +/// +/// Information about a function that can be called directly with a direct `call` instruction. +#[derive(Clone, Debug, PartialEq, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct ExtFuncData { + /// Name of the external function. + pub name: ExternalName, + /// Call signature of function. + pub signature: SigRef, + /// Will this function be defined nearby, such that it will always be a certain distance away, + /// after linking? If so, references to it can avoid going through a GOT or PLT. Note that + /// symbols meant to be preemptible cannot be considered colocated. + /// + /// If `true`, some backends may use relocation forms that have limited range. The exact + /// distance depends on the code model in use. Currently on AArch64, for example, Cranelift + /// uses a custom code model supporting up to +/- 128MB displacements. If it is unknown how + /// far away the target will be, it is best not to set the `colocated` flag; in general, this + /// flag is best used when the target is known to be in the same unit of code generation, such + /// as a Wasm module. + /// + /// See the documentation for `RelocDistance` for more details. A `colocated` flag value of + /// `true` implies `RelocDistance::Near`. + pub colocated: bool, +} + +impl ExtFuncData { + /// Returns a displayable version of the `ExtFuncData`, with or without extra context to + /// prettify the output. + pub fn display<'a>( + &'a self, + params: Option<&'a FunctionParameters>, + ) -> DisplayableExtFuncData<'a> { + DisplayableExtFuncData { + ext_func: self, + params, + } + } +} + +/// A displayable `ExtFuncData`, with extra context to prettify the output. +pub struct DisplayableExtFuncData<'a> { + ext_func: &'a ExtFuncData, + params: Option<&'a FunctionParameters>, +} + +impl<'a> fmt::Display for DisplayableExtFuncData<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.ext_func.colocated { + write!(f, "colocated ")?; + } + write!( + f, + "{} {}", + self.ext_func.name.display(self.params), + self.ext_func.signature + ) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ir::types::{F32, I32, I8}; + use alloc::string::ToString; + + #[test] + fn argument_type() { + let t = AbiParam::new(I32); + assert_eq!(t.to_string(), "i32"); + let mut t = t.uext(); + assert_eq!(t.to_string(), "i32 uext"); + assert_eq!(t.sext().to_string(), "i32 sext"); + t.purpose = ArgumentPurpose::StructReturn; + assert_eq!(t.to_string(), "i32 uext sret"); + } + + #[test] + fn argument_purpose() { + let all_purpose = [ + (ArgumentPurpose::Normal, "normal"), + (ArgumentPurpose::StructReturn, "sret"), + (ArgumentPurpose::VMContext, "vmctx"), + (ArgumentPurpose::StructArgument(42), "sarg(42)"), + ]; + for &(e, n) in &all_purpose { + assert_eq!(e.to_string(), n); + assert_eq!(Ok(e), n.parse()); + } + } + + #[test] + fn call_conv() { + for &cc in &[ + CallConv::Fast, + CallConv::Cold, + CallConv::SystemV, + CallConv::WindowsFastcall, + ] { + assert_eq!(Ok(cc), cc.to_string().parse()) + } + } + + #[test] + fn signatures() { + let mut sig = Signature::new(CallConv::WindowsFastcall); + assert_eq!(sig.to_string(), "() windows_fastcall"); + sig.params.push(AbiParam::new(I32)); + assert_eq!(sig.to_string(), "(i32) windows_fastcall"); + sig.returns.push(AbiParam::new(F32)); + assert_eq!(sig.to_string(), "(i32) -> f32 windows_fastcall"); + sig.params.push(AbiParam::new(I32.by(4).unwrap())); + assert_eq!(sig.to_string(), "(i32, i32x4) -> f32 windows_fastcall"); + sig.returns.push(AbiParam::new(I8)); + assert_eq!(sig.to_string(), "(i32, i32x4) -> f32, i8 windows_fastcall"); + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/extname.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/extname.rs new file mode 100644 index 000000000..8484376b5 --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/extname.rs @@ -0,0 +1,333 @@ +//! External names. +//! +//! These are identifiers for declaring entities defined outside the current +//! function. The name of an external declaration doesn't have any meaning to +//! Cranelift, which compiles functions independently. + +use crate::ir::{KnownSymbol, LibCall}; +use alloc::boxed::Box; +use core::fmt::{self, Write}; +use core::str::FromStr; + +use cranelift_entity::EntityRef as _; +#[cfg(feature = "enable-serde")] +use serde_derive::{Deserialize, Serialize}; + +use super::entities::UserExternalNameRef; +use super::function::FunctionParameters; + +/// An explicit name for a user-defined function, be it defined in code or in CLIF text. +/// +/// This is used both for naming a function (for debugging purposes) and for declaring external +/// functions. In the latter case, this becomes an `ExternalName`, which gets embedded in +/// relocations later, etc. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub enum UserFuncName { + /// A user-defined name, with semantics left to the user. + User(UserExternalName), + /// A name for a test case, mostly intended for Cranelift testing. + Testcase(TestcaseName), +} + +impl UserFuncName { + /// Creates a new external name from a sequence of bytes. Caller is expected + /// to guarantee bytes are only ascii alphanumeric or `_`. + pub fn testcase>(v: T) -> Self { + Self::Testcase(TestcaseName::new(v)) + } + + /// Create a new external name from a user-defined external function reference. + pub fn user(namespace: u32, index: u32) -> Self { + Self::User(UserExternalName::new(namespace, index)) + } + + /// Get a `UserExternalName` if this is a user-defined name. + pub fn get_user(&self) -> Option<&UserExternalName> { + match self { + UserFuncName::User(user) => Some(user), + UserFuncName::Testcase(_) => None, + } + } +} + +impl Default for UserFuncName { + fn default() -> Self { + UserFuncName::User(UserExternalName::default()) + } +} + +impl fmt::Display for UserFuncName { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + UserFuncName::User(user) => user.fmt(f), + UserFuncName::Testcase(testcase) => testcase.fmt(f), + } + } +} + +/// An external name in a user-defined symbol table. +/// +/// Cranelift does not interpret these numbers in any way, so they can represent arbitrary values. +#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct UserExternalName { + /// Arbitrary. + pub namespace: u32, + /// Arbitrary. + pub index: u32, +} + +impl UserExternalName { + /// Creates a new [UserExternalName]. + pub fn new(namespace: u32, index: u32) -> Self { + Self { namespace, index } + } +} + +impl fmt::Display for UserExternalName { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "u{}:{}", self.namespace, self.index) + } +} + +/// A name for a test case. +#[derive(Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct TestcaseName(Box<[u8]>); + +impl fmt::Display for TestcaseName { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_char('%')?; + f.write_str(std::str::from_utf8(&self.0).unwrap()) + } +} + +impl fmt::Debug for TestcaseName { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{self}") + } +} + +impl TestcaseName { + pub(crate) fn new>(v: T) -> Self { + Self(v.as_ref().into()) + } +} + +/// The name of an external is either a reference to a user-defined symbol +/// table, or a short sequence of ascii bytes so that test cases do not have +/// to keep track of a symbol table. +/// +/// External names are primarily used as keys by code using Cranelift to map +/// from a `cranelift_codegen::ir::FuncRef` or similar to additional associated +/// data. +/// +/// External names can also serve as a primitive testing and debugging tool. +/// In particular, many `.clif` test files use function names to identify +/// functions. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub enum ExternalName { + /// A reference to a name in a user-defined symbol table. + User(UserExternalNameRef), + /// A test case function name of up to a hardcoded amount of ascii + /// characters. This is not intended to be used outside test cases. + TestCase(TestcaseName), + /// A well-known runtime library function. + LibCall(LibCall), + /// A well-known symbol. + KnownSymbol(KnownSymbol), +} + +impl Default for ExternalName { + fn default() -> Self { + Self::User(UserExternalNameRef::new(0)) + } +} + +impl ExternalName { + /// Creates a new external name from a sequence of bytes. Caller is expected + /// to guarantee bytes are only ascii alphanumeric or `_`. + /// + /// # Examples + /// + /// ```rust + /// # use cranelift_codegen::ir::ExternalName; + /// // Create `ExternalName` from a string. + /// let name = ExternalName::testcase("hello"); + /// assert_eq!(name.display(None).to_string(), "%hello"); + /// ``` + pub fn testcase>(v: T) -> Self { + Self::TestCase(TestcaseName::new(v)) + } + + /// Create a new external name from a user-defined external function reference. + /// + /// # Examples + /// ```rust + /// # use cranelift_codegen::ir::{ExternalName, UserExternalNameRef}; + /// let user_func_ref: UserExternalNameRef = Default::default(); // usually obtained with `Function::declare_imported_user_function()` + /// let name = ExternalName::user(user_func_ref); + /// assert_eq!(name.display(None).to_string(), "userextname0"); + /// ``` + pub fn user(func_ref: UserExternalNameRef) -> Self { + Self::User(func_ref) + } + + /// Returns a display for the current `ExternalName`, with extra context to prettify the + /// output. + pub fn display<'a>( + &'a self, + params: Option<&'a FunctionParameters>, + ) -> DisplayableExternalName<'a> { + DisplayableExternalName { name: self, params } + } +} + +/// An `ExternalName` that has enough context to be displayed. +pub struct DisplayableExternalName<'a> { + name: &'a ExternalName, + params: Option<&'a FunctionParameters>, +} + +impl<'a> fmt::Display for DisplayableExternalName<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self.name { + ExternalName::User(func_ref) => { + if let Some(params) = self.params { + let name = ¶ms.user_named_funcs()[*func_ref]; + write!(f, "u{}:{}", name.namespace, name.index) + } else { + // Best effort. + write!(f, "{}", *func_ref) + } + } + ExternalName::TestCase(testcase) => testcase.fmt(f), + ExternalName::LibCall(lc) => write!(f, "%{lc}"), + ExternalName::KnownSymbol(ks) => write!(f, "%{ks}"), + } + } +} + +impl FromStr for ExternalName { + type Err = (); + + fn from_str(s: &str) -> Result { + // Try to parse as a known symbol + if let Ok(ks) = s.parse() { + return Ok(Self::KnownSymbol(ks)); + } + + // Try to parse as a libcall name + if let Ok(lc) = s.parse() { + return Ok(Self::LibCall(lc)); + } + + // Otherwise its a test case name + Ok(Self::testcase(s.as_bytes())) + } +} + +#[cfg(test)] +mod tests { + use super::ExternalName; + use crate::ir::{ + entities::UserExternalNameRef, function::FunctionParameters, LibCall, UserExternalName, + }; + use alloc::string::ToString; + use core::u32; + use cranelift_entity::EntityRef as _; + + #[cfg(target_pointer_width = "64")] + #[test] + fn externalname_size() { + assert_eq!(core::mem::size_of::(), 24); + } + + #[test] + fn display_testcase() { + assert_eq!(ExternalName::testcase("").display(None).to_string(), "%"); + assert_eq!(ExternalName::testcase("x").display(None).to_string(), "%x"); + assert_eq!( + ExternalName::testcase("x_1").display(None).to_string(), + "%x_1" + ); + assert_eq!( + ExternalName::testcase("longname12345678") + .display(None) + .to_string(), + "%longname12345678" + ); + assert_eq!( + ExternalName::testcase("longname123456789") + .display(None) + .to_string(), + "%longname123456789" + ); + } + + #[test] + fn display_user() { + assert_eq!( + ExternalName::user(UserExternalNameRef::new(0)) + .display(None) + .to_string(), + "userextname0" + ); + assert_eq!( + ExternalName::user(UserExternalNameRef::new(1)) + .display(None) + .to_string(), + "userextname1" + ); + assert_eq!( + ExternalName::user(UserExternalNameRef::new((u32::MAX - 1) as _)) + .display(None) + .to_string(), + "userextname4294967294" + ); + + let mut func_params = FunctionParameters::new(); + + // ref 0 + func_params.ensure_user_func_name(UserExternalName { + namespace: 13, + index: 37, + }); + + // ref 1 + func_params.ensure_user_func_name(UserExternalName { + namespace: 2, + index: 4, + }); + + assert_eq!( + ExternalName::user(UserExternalNameRef::new(0)) + .display(Some(&func_params)) + .to_string(), + "u13:37" + ); + + assert_eq!( + ExternalName::user(UserExternalNameRef::new(1)) + .display(Some(&func_params)) + .to_string(), + "u2:4" + ); + } + + #[test] + fn parsing() { + assert_eq!( + "FloorF32".parse(), + Ok(ExternalName::LibCall(LibCall::FloorF32)) + ); + assert_eq!( + ExternalName::LibCall(LibCall::FloorF32) + .display(None) + .to_string(), + "%FloorF32" + ); + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/function.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/function.rs new file mode 100644 index 000000000..ebf7f08cb --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/function.rs @@ -0,0 +1,495 @@ +//! Intermediate representation of a function. +//! +//! The `Function` struct defined in this module owns all of its basic blocks and +//! instructions. + +use crate::entity::{PrimaryMap, SecondaryMap}; +use crate::ir::{ + self, pcc::Fact, Block, DataFlowGraph, DynamicStackSlot, DynamicStackSlotData, + DynamicStackSlots, DynamicType, ExtFuncData, FuncRef, GlobalValue, GlobalValueData, Inst, + JumpTable, JumpTableData, Layout, MemoryType, MemoryTypeData, SigRef, Signature, SourceLocs, + StackSlot, StackSlotData, StackSlots, Type, +}; +use crate::isa::CallConv; +use crate::write::write_function; +use crate::HashMap; +#[cfg(feature = "enable-serde")] +use alloc::string::String; +use core::fmt; + +#[cfg(feature = "enable-serde")] +use serde::de::{Deserializer, Error}; +#[cfg(feature = "enable-serde")] +use serde::ser::Serializer; +#[cfg(feature = "enable-serde")] +use serde::{Deserialize, Serialize}; + +use super::entities::UserExternalNameRef; +use super::extname::UserFuncName; +use super::{RelSourceLoc, SourceLoc, UserExternalName}; + +/// A version marker used to ensure that serialized clif ir is never deserialized with a +/// different version of Cranelift. +#[derive(Default, Copy, Clone, Debug, PartialEq, Hash)] +pub struct VersionMarker; + +#[cfg(feature = "enable-serde")] +impl Serialize for VersionMarker { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + crate::VERSION.serialize(serializer) + } +} + +#[cfg(feature = "enable-serde")] +impl<'de> Deserialize<'de> for VersionMarker { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let version = String::deserialize(deserializer)?; + if version != crate::VERSION { + return Err(D::Error::custom(&format!( + "Expected a clif ir function for version {}, found one for version {}", + crate::VERSION, + version, + ))); + } + Ok(VersionMarker) + } +} + +/// Function parameters used when creating this function, and that will become applied after +/// compilation to materialize the final `CompiledCode`. +#[derive(Clone, PartialEq)] +#[cfg_attr( + feature = "enable-serde", + derive(serde_derive::Serialize, serde_derive::Deserialize) +)] +pub struct FunctionParameters { + /// The first `SourceLoc` appearing in the function, serving as a base for every relative + /// source loc in the function. + base_srcloc: Option, + + /// External user-defined function references. + user_named_funcs: PrimaryMap, + + /// Inverted mapping of `user_named_funcs`, to deduplicate internally. + user_ext_name_to_ref: HashMap, +} + +impl FunctionParameters { + /// Creates a new `FunctionParameters` with the given name. + pub fn new() -> Self { + Self { + base_srcloc: None, + user_named_funcs: Default::default(), + user_ext_name_to_ref: Default::default(), + } + } + + /// Returns the base `SourceLoc`. + /// + /// If it was never explicitly set with `ensure_base_srcloc`, will return an invalid + /// `SourceLoc`. + pub fn base_srcloc(&self) -> SourceLoc { + self.base_srcloc.unwrap_or_default() + } + + /// Sets the base `SourceLoc`, if not set yet, and returns the base value. + pub fn ensure_base_srcloc(&mut self, srcloc: SourceLoc) -> SourceLoc { + match self.base_srcloc { + Some(val) => val, + None => { + self.base_srcloc = Some(srcloc); + srcloc + } + } + } + + /// Retrieve a `UserExternalNameRef` for the given name, or add a new one. + /// + /// This method internally deduplicates same `UserExternalName` so they map to the same + /// reference. + pub fn ensure_user_func_name(&mut self, name: UserExternalName) -> UserExternalNameRef { + if let Some(reff) = self.user_ext_name_to_ref.get(&name) { + *reff + } else { + let reff = self.user_named_funcs.push(name.clone()); + self.user_ext_name_to_ref.insert(name, reff); + reff + } + } + + /// Resets an already existing user function name to a new value. + pub fn reset_user_func_name(&mut self, index: UserExternalNameRef, name: UserExternalName) { + if let Some(prev_name) = self.user_named_funcs.get_mut(index) { + self.user_ext_name_to_ref.remove(prev_name); + *prev_name = name.clone(); + self.user_ext_name_to_ref.insert(name, index); + } + } + + /// Returns the internal mapping of `UserExternalNameRef` to `UserExternalName`. + pub fn user_named_funcs(&self) -> &PrimaryMap { + &self.user_named_funcs + } + + fn clear(&mut self) { + self.base_srcloc = None; + self.user_named_funcs.clear(); + self.user_ext_name_to_ref.clear(); + } +} + +/// Function fields needed when compiling a function. +/// +/// Additionally, these fields can be the same for two functions that would be compiled the same +/// way, and finalized by applying `FunctionParameters` onto their `CompiledCodeStencil`. +#[derive(Clone, PartialEq, Hash)] +#[cfg_attr( + feature = "enable-serde", + derive(serde_derive::Serialize, serde_derive::Deserialize) +)] +pub struct FunctionStencil { + /// A version marker used to ensure that serialized clif ir is never deserialized with a + /// different version of Cranelift. + // Note: This must be the first field to ensure that Serde will deserialize it before + // attempting to deserialize other fields that are potentially changed between versions. + pub version_marker: VersionMarker, + + /// Signature of this function. + pub signature: Signature, + + /// Sized stack slots allocated in this function. + pub sized_stack_slots: StackSlots, + + /// Dynamic stack slots allocated in this function. + pub dynamic_stack_slots: DynamicStackSlots, + + /// Global values referenced. + pub global_values: PrimaryMap, + + /// Global value proof-carrying-code facts. + pub global_value_facts: SecondaryMap>, + + /// Memory types for proof-carrying code. + pub memory_types: PrimaryMap, + + /// Data flow graph containing the primary definition of all instructions, blocks and values. + pub dfg: DataFlowGraph, + + /// Layout of blocks and instructions in the function body. + pub layout: Layout, + + /// Source locations. + /// + /// Track the original source location for each instruction. The source locations are not + /// interpreted by Cranelift, only preserved. + pub srclocs: SourceLocs, + + /// An optional global value which represents an expression evaluating to + /// the stack limit for this function. This `GlobalValue` will be + /// interpreted in the prologue, if necessary, to insert a stack check to + /// ensure that a trap happens if the stack pointer goes below the + /// threshold specified here. + pub stack_limit: Option, +} + +impl FunctionStencil { + fn clear(&mut self) { + self.signature.clear(CallConv::Fast); + self.sized_stack_slots.clear(); + self.dynamic_stack_slots.clear(); + self.global_values.clear(); + self.global_value_facts.clear(); + self.memory_types.clear(); + self.dfg.clear(); + self.layout.clear(); + self.srclocs.clear(); + self.stack_limit = None; + } + + /// Creates a jump table in the function, to be used by `br_table` instructions. + pub fn create_jump_table(&mut self, data: JumpTableData) -> JumpTable { + self.dfg.jump_tables.push(data) + } + + /// Creates a sized stack slot in the function, to be used by `stack_load`, `stack_store` + /// and `stack_addr` instructions. + pub fn create_sized_stack_slot(&mut self, data: StackSlotData) -> StackSlot { + self.sized_stack_slots.push(data) + } + + /// Creates a dynamic stack slot in the function, to be used by `dynamic_stack_load`, + /// `dynamic_stack_store` and `dynamic_stack_addr` instructions. + pub fn create_dynamic_stack_slot(&mut self, data: DynamicStackSlotData) -> DynamicStackSlot { + self.dynamic_stack_slots.push(data) + } + + /// Adds a signature which can later be used to declare an external function import. + pub fn import_signature(&mut self, signature: Signature) -> SigRef { + self.dfg.signatures.push(signature) + } + + /// Declares a global value accessible to the function. + pub fn create_global_value(&mut self, data: GlobalValueData) -> GlobalValue { + self.global_values.push(data) + } + + /// Declares a memory type for use by the function. + pub fn create_memory_type(&mut self, data: MemoryTypeData) -> MemoryType { + self.memory_types.push(data) + } + + /// Find the global dyn_scale value associated with given DynamicType. + pub fn get_dyn_scale(&self, ty: DynamicType) -> GlobalValue { + self.dfg.dynamic_types.get(ty).unwrap().dynamic_scale + } + + /// Find the global dyn_scale for the given stack slot. + pub fn get_dynamic_slot_scale(&self, dss: DynamicStackSlot) -> GlobalValue { + let dyn_ty = self.dynamic_stack_slots.get(dss).unwrap().dyn_ty; + self.get_dyn_scale(dyn_ty) + } + + /// Get a concrete `Type` from a user defined `DynamicType`. + pub fn get_concrete_dynamic_ty(&self, ty: DynamicType) -> Option { + self.dfg + .dynamic_types + .get(ty) + .unwrap_or_else(|| panic!("Undeclared dynamic vector type: {ty}")) + .concrete() + } + + /// Find a presumed unique special-purpose function parameter value. + /// + /// Returns the value of the last `purpose` parameter, or `None` if no such parameter exists. + pub fn special_param(&self, purpose: ir::ArgumentPurpose) -> Option { + let entry = self.layout.entry_block().expect("Function is empty"); + self.signature + .special_param_index(purpose) + .map(|i| self.dfg.block_params(entry)[i]) + } + + /// Starts collection of debug information. + pub fn collect_debug_info(&mut self) { + self.dfg.collect_debug_info(); + } + + /// Rewrite the branch destination to `new_dest` if the destination matches `old_dest`. + /// Does nothing if called with a non-jump or non-branch instruction. + pub fn rewrite_branch_destination(&mut self, inst: Inst, old_dest: Block, new_dest: Block) { + for dest in self.dfg.insts[inst].branch_destination_mut(&mut self.dfg.jump_tables) { + if dest.block(&self.dfg.value_lists) == old_dest { + dest.set_block(new_dest, &mut self.dfg.value_lists) + } + } + } + + /// Checks that the specified block can be encoded as a basic block. + /// + /// On error, returns the first invalid instruction and an error message. + pub fn is_block_basic(&self, block: Block) -> Result<(), (Inst, &'static str)> { + let dfg = &self.dfg; + let inst_iter = self.layout.block_insts(block); + + // Ignore all instructions prior to the first branch. + let mut inst_iter = inst_iter.skip_while(|&inst| !dfg.insts[inst].opcode().is_branch()); + + if let Some(_branch) = inst_iter.next() { + if let Some(next) = inst_iter.next() { + return Err((next, "post-terminator instruction")); + } + } + + Ok(()) + } + + /// Returns an iterator over the blocks succeeding the given block. + pub fn block_successors(&self, block: Block) -> impl DoubleEndedIterator + '_ { + self.layout.last_inst(block).into_iter().flat_map(|inst| { + self.dfg.insts[inst] + .branch_destination(&self.dfg.jump_tables) + .iter() + .map(|block| block.block(&self.dfg.value_lists)) + }) + } + + /// Returns true if the function is function that doesn't call any other functions. This is not + /// to be confused with a "leaf function" in Windows terminology. + pub fn is_leaf(&self) -> bool { + // Conservative result: if there's at least one function signature referenced in this + // function, assume it is not a leaf. + let has_signatures = !self.dfg.signatures.is_empty(); + + // Under some TLS models, retrieving the address of a TLS variable requires calling a + // function. Conservatively assume that any function that references a tls global value + // is not a leaf. + let has_tls = self.global_values.values().any(|gv| match gv { + GlobalValueData::Symbol { tls, .. } => *tls, + _ => false, + }); + + !has_signatures && !has_tls + } + + /// Replace the `dst` instruction's data with the `src` instruction's data + /// and then remove `src`. + /// + /// `src` and its result values should not be used at all, as any uses would + /// be left dangling after calling this method. + /// + /// `src` and `dst` must have the same number of resulting values, and + /// `src`'s i^th value must have the same type as `dst`'s i^th value. + pub fn transplant_inst(&mut self, dst: Inst, src: Inst) { + debug_assert_eq!( + self.dfg.inst_results(dst).len(), + self.dfg.inst_results(src).len() + ); + debug_assert!(self + .dfg + .inst_results(dst) + .iter() + .zip(self.dfg.inst_results(src)) + .all(|(a, b)| self.dfg.value_type(*a) == self.dfg.value_type(*b))); + + self.dfg.insts[dst] = self.dfg.insts[src]; + self.layout.remove_inst(src); + } + + /// Size occupied by all stack slots associated with this function. + /// + /// Does not include any padding necessary due to offsets + pub fn fixed_stack_size(&self) -> u32 { + self.sized_stack_slots.values().map(|ss| ss.size).sum() + } + + /// Returns the list of relative source locations for this function. + pub(crate) fn rel_srclocs(&self) -> &SecondaryMap { + &self.srclocs + } +} + +/// Functions can be cloned, but it is not a very fast operation. +/// The clone will have all the same entity numbers as the original. +#[derive(Clone, PartialEq)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct Function { + /// Name of this function. + /// + /// Mostly used by `.clif` files, only there for debugging / naming purposes. + pub name: UserFuncName, + + /// All the fields required for compiling a function, independently of details irrelevant to + /// compilation and that are stored in the `FunctionParameters` `params` field instead. + pub stencil: FunctionStencil, + + /// All the parameters that can be applied onto the function stencil, that is, that don't + /// matter when caching compilation artifacts. + pub params: FunctionParameters, +} + +impl core::ops::Deref for Function { + type Target = FunctionStencil; + + fn deref(&self) -> &Self::Target { + &self.stencil + } +} + +impl core::ops::DerefMut for Function { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.stencil + } +} + +impl Function { + /// Create a function with the given name and signature. + pub fn with_name_signature(name: UserFuncName, sig: Signature) -> Self { + Self { + name, + stencil: FunctionStencil { + version_marker: VersionMarker, + signature: sig, + sized_stack_slots: StackSlots::new(), + dynamic_stack_slots: DynamicStackSlots::new(), + global_values: PrimaryMap::new(), + global_value_facts: SecondaryMap::new(), + memory_types: PrimaryMap::new(), + dfg: DataFlowGraph::new(), + layout: Layout::new(), + srclocs: SecondaryMap::new(), + stack_limit: None, + }, + params: FunctionParameters::new(), + } + } + + /// Clear all data structures in this function. + pub fn clear(&mut self) { + self.stencil.clear(); + self.params.clear(); + self.name = UserFuncName::default(); + } + + /// Create a new empty, anonymous function with a Fast calling convention. + pub fn new() -> Self { + Self::with_name_signature(Default::default(), Signature::new(CallConv::Fast)) + } + + /// Return an object that can display this function with correct ISA-specific annotations. + pub fn display(&self) -> DisplayFunction<'_> { + DisplayFunction(self) + } + + /// Sets an absolute source location for the given instruction. + /// + /// If no base source location has been set yet, records it at the same time. + pub fn set_srcloc(&mut self, inst: Inst, srcloc: SourceLoc) { + let base = self.params.ensure_base_srcloc(srcloc); + self.stencil.srclocs[inst] = RelSourceLoc::from_base_offset(base, srcloc); + } + + /// Returns an absolute source location for the given instruction. + pub fn srcloc(&self, inst: Inst) -> SourceLoc { + let base = self.params.base_srcloc(); + self.stencil.srclocs[inst].expand(base) + } + + /// Declare a user-defined external function import, to be referenced in `ExtFuncData::User` later. + pub fn declare_imported_user_function( + &mut self, + name: UserExternalName, + ) -> UserExternalNameRef { + self.params.ensure_user_func_name(name) + } + + /// Declare an external function import. + pub fn import_function(&mut self, data: ExtFuncData) -> FuncRef { + self.stencil.dfg.ext_funcs.push(data) + } +} + +/// Wrapper type capable of displaying a `Function`. +pub struct DisplayFunction<'a>(&'a Function); + +impl<'a> fmt::Display for DisplayFunction<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write_function(fmt, self.0) + } +} + +impl fmt::Display for Function { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write_function(fmt, self) + } +} + +impl fmt::Debug for Function { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write_function(fmt, self) + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/globalvalue.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/globalvalue.rs new file mode 100644 index 000000000..89120c8b8 --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/globalvalue.rs @@ -0,0 +1,147 @@ +//! Global values. + +use crate::ir::immediates::{Imm64, Offset32}; +use crate::ir::{ExternalName, GlobalValue, MemFlags, Type}; +use crate::isa::TargetIsa; +use core::fmt; + +#[cfg(feature = "enable-serde")] +use serde_derive::{Deserialize, Serialize}; + +/// Information about a global value declaration. +#[derive(Debug, Clone, PartialEq, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub enum GlobalValueData { + /// Value is the address of the VM context struct. + VMContext, + + /// Value is pointed to by another global value. + /// + /// The `base` global value is assumed to contain a pointer. This global value is computed + /// by loading from memory at that pointer value. The memory must be accessible, and + /// naturally aligned to hold a value of the type. The data at this address is assumed + /// to never change while the current function is executing. + Load { + /// The base pointer global value. + base: GlobalValue, + + /// Offset added to the base pointer before doing the load. + offset: Offset32, + + /// Type of the loaded value. + global_type: Type, + + /// Specifies the memory flags to be used by the load. Guaranteed to be notrap and aligned. + flags: MemFlags, + }, + + /// Value is an offset from another global value. + IAddImm { + /// The base pointer global value. + base: GlobalValue, + + /// Byte offset to be added to the value. + offset: Imm64, + + /// Type of the iadd. + global_type: Type, + }, + + /// Value is symbolic, meaning it's a name which will be resolved to an + /// actual value later (eg. by linking). Cranelift itself does not interpret + /// this name; it's used by embedders to link with other data structures. + /// + /// For now, symbolic values always have pointer type, and represent + /// addresses, however in the future they could be used to represent other + /// things as well. + Symbol { + /// The symbolic name. + name: ExternalName, + + /// Offset from the symbol. This can be used instead of IAddImm to represent folding an + /// offset into a symbol. + offset: Imm64, + + /// Will this symbol be defined nearby, such that it will always be a certain distance + /// away, after linking? If so, references to it can avoid going through a GOT. Note that + /// symbols meant to be preemptible cannot be colocated. + /// + /// If `true`, some backends may use relocation forms that have limited range: for example, + /// a +/- 2^27-byte range on AArch64. See the documentation for + /// `RelocDistance` for more details. + colocated: bool, + + /// Does this symbol refer to a thread local storage value? + tls: bool, + }, + + /// Value is a multiple of how many instances of `vector_type` will fit in + /// a target vector register. + DynScaleTargetConst { + /// Base vector type. + vector_type: Type, + }, +} + +impl GlobalValueData { + /// Assume that `self` is an `GlobalValueData::Symbol` and return its name. + pub fn symbol_name(&self) -> &ExternalName { + match *self { + Self::Symbol { ref name, .. } => name, + _ => panic!("only symbols have names"), + } + } + + /// Return the type of this global. + pub fn global_type(&self, isa: &dyn TargetIsa) -> Type { + match *self { + Self::VMContext { .. } | Self::Symbol { .. } => isa.pointer_type(), + Self::IAddImm { global_type, .. } | Self::Load { global_type, .. } => global_type, + Self::DynScaleTargetConst { .. } => isa.pointer_type(), + } + } +} + +impl fmt::Display for GlobalValueData { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + Self::VMContext => write!(f, "vmctx"), + Self::Load { + base, + offset, + global_type, + flags, + } => write!(f, "load.{global_type}{flags} {base}{offset}"), + Self::IAddImm { + global_type, + base, + offset, + } => write!(f, "iadd_imm.{global_type} {base}, {offset}"), + Self::Symbol { + ref name, + offset, + colocated, + tls, + } => { + write!( + f, + "symbol {}{}{}", + if colocated { "colocated " } else { "" }, + if tls { "tls " } else { "" }, + name.display(None) + )?; + let offset_val: i64 = offset.into(); + if offset_val > 0 { + write!(f, "+")?; + } + if offset_val != 0 { + write!(f, "{offset}")?; + } + Ok(()) + } + Self::DynScaleTargetConst { vector_type } => { + write!(f, "dyn_scale_target_const.{vector_type}") + } + } + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/immediates.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/immediates.rs new file mode 100644 index 000000000..82ca114be --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/immediates.rs @@ -0,0 +1,1961 @@ +//! Immediate operands for Cranelift instructions +//! +//! This module defines the types of immediate operands that can appear on Cranelift instructions. +//! Each type here should have a corresponding definition in the +//! `cranelift-codegen/meta/src/shared/immediates` crate in the meta language. + +use alloc::vec::Vec; +use core::cmp::Ordering; +use core::fmt::{self, Display, Formatter}; +use core::ops::{Add, BitAnd, BitOr, BitXor, Div, Mul, Neg, Not, Sub}; +use core::str::FromStr; +use core::{i32, u32}; +use cranelift_entity::{Signed, Unsigned}; +#[cfg(feature = "enable-serde")] +use serde_derive::{Deserialize, Serialize}; + +/// Convert a type into a vector of bytes; all implementors in this file must use little-endian +/// orderings of bytes to match WebAssembly's little-endianness. +pub trait IntoBytes { + /// Return the little-endian byte representation of the implementing type. + fn into_bytes(self) -> Vec; +} + +impl IntoBytes for u8 { + fn into_bytes(self) -> Vec { + vec![self] + } +} + +impl IntoBytes for i8 { + fn into_bytes(self) -> Vec { + vec![self as u8] + } +} + +impl IntoBytes for i16 { + fn into_bytes(self) -> Vec { + self.to_le_bytes().to_vec() + } +} + +impl IntoBytes for i32 { + fn into_bytes(self) -> Vec { + self.to_le_bytes().to_vec() + } +} + +impl IntoBytes for Vec { + fn into_bytes(self) -> Vec { + self + } +} + +/// 64-bit immediate signed integer operand. +/// +/// An `Imm64` operand can also be used to represent immediate values of smaller integer types by +/// sign-extending to `i64`. +#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct Imm64(i64); + +impl Imm64 { + /// Create a new `Imm64` representing the signed number `x`. + pub fn new(x: i64) -> Self { + Self(x) + } + + /// Return self negated. + pub fn wrapping_neg(self) -> Self { + Self(self.0.wrapping_neg()) + } + + /// Returns the value of this immediate. + pub fn bits(&self) -> i64 { + self.0 + } + + /// Mask this immediate to the given power-of-two bit width. + #[must_use] + pub(crate) fn mask_to_width(&self, bit_width: u32) -> Self { + debug_assert!(bit_width.is_power_of_two()); + + if bit_width >= 64 { + return *self; + } + + let bit_width = i64::from(bit_width); + let mask = (1 << bit_width) - 1; + let masked = self.0 & mask; + Imm64(masked) + } + + /// Sign extend this immediate as if it were a signed integer of the given + /// power-of-two width. + #[must_use] + pub fn sign_extend_from_width(&self, bit_width: u32) -> Self { + debug_assert!( + bit_width.is_power_of_two(), + "{bit_width} is not a power of two" + ); + + if bit_width >= 64 { + return *self; + } + + let bit_width = i64::from(bit_width); + let delta = 64 - bit_width; + let sign_extended = (self.0 << delta) >> delta; + Imm64(sign_extended) + } + + /// Zero extend this immediate as if it were an unsigned integer of the + /// given power-of-two width. + #[must_use] + pub fn zero_extend_from_width(&self, bit_width: u32) -> Self { + debug_assert!( + bit_width.is_power_of_two(), + "{bit_width} is not a power of two" + ); + + if bit_width >= 64 { + return *self; + } + + let bit_width = u64::from(bit_width); + let delta = 64 - bit_width; + let zero_extended = (self.0.unsigned() << delta) >> delta; + Imm64(zero_extended.signed()) + } +} + +impl From for i64 { + fn from(val: Imm64) -> i64 { + val.0 + } +} + +impl IntoBytes for Imm64 { + fn into_bytes(self) -> Vec { + self.0.to_le_bytes().to_vec() + } +} + +impl From for Imm64 { + fn from(x: i64) -> Self { + Self(x) + } +} + +impl Display for Imm64 { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + let x = self.0; + if x < 10_000 { + // Use decimal for small and negative numbers. + write!(f, "{x}") + } else { + write_hex(x as u64, f) + } + } +} + +/// Parse a 64-bit signed number. +fn parse_i64(s: &str) -> Result { + let negative = s.starts_with('-'); + let s2 = if negative || s.starts_with('+') { + &s[1..] + } else { + s + }; + + let mut value = parse_u64(s2)?; + + // We support the range-and-a-half from -2^63 .. 2^64-1. + if negative { + value = value.wrapping_neg(); + // Don't allow large negative values to wrap around and become positive. + if value as i64 > 0 { + return Err("Negative number too small"); + } + } + Ok(value as i64) +} + +impl FromStr for Imm64 { + type Err = &'static str; + + // Parse a decimal or hexadecimal `Imm64`, formatted as above. + fn from_str(s: &str) -> Result { + parse_i64(s).map(Self::new) + } +} + +/// 64-bit immediate unsigned integer operand. +/// +/// A `Uimm64` operand can also be used to represent immediate values of smaller integer types by +/// zero-extending to `i64`. +#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct Uimm64(u64); + +impl Uimm64 { + /// Create a new `Uimm64` representing the unsigned number `x`. + pub fn new(x: u64) -> Self { + Self(x) + } + + /// Return self negated. + pub fn wrapping_neg(self) -> Self { + Self(self.0.wrapping_neg()) + } +} + +impl From for u64 { + fn from(val: Uimm64) -> u64 { + val.0 + } +} + +impl From for Uimm64 { + fn from(x: u64) -> Self { + Self(x) + } +} + +/// Hexadecimal with a multiple of 4 digits and group separators: +/// +/// 0xfff0 +/// 0x0001_ffff +/// 0xffff_ffff_fff8_4400 +/// +fn write_hex(x: u64, f: &mut Formatter) -> fmt::Result { + let mut pos = (64 - x.leading_zeros() - 1) & 0xf0; + write!(f, "0x{:04x}", (x >> pos) & 0xffff)?; + while pos > 0 { + pos -= 16; + write!(f, "_{:04x}", (x >> pos) & 0xffff)?; + } + Ok(()) +} + +impl Display for Uimm64 { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + let x = self.0; + if x < 10_000 { + // Use decimal for small numbers. + write!(f, "{x}") + } else { + write_hex(x, f) + } + } +} + +/// Parse a 64-bit unsigned number. +fn parse_u64(s: &str) -> Result { + let mut value: u64 = 0; + let mut digits = 0; + + if s.starts_with("-0x") { + return Err("Invalid character in hexadecimal number"); + } else if let Some(num) = s.strip_prefix("0x") { + // Hexadecimal. + for ch in num.chars() { + match ch.to_digit(16) { + Some(digit) => { + digits += 1; + if digits > 16 { + return Err("Too many hexadecimal digits"); + } + // This can't overflow given the digit limit. + value = (value << 4) | u64::from(digit); + } + None => { + // Allow embedded underscores, but fail on anything else. + if ch != '_' { + return Err("Invalid character in hexadecimal number"); + } + } + } + } + } else { + // Decimal number, possibly negative. + for ch in s.chars() { + match ch.to_digit(10) { + Some(digit) => { + digits += 1; + match value.checked_mul(10) { + None => return Err("Too large decimal number"), + Some(v) => value = v, + } + match value.checked_add(u64::from(digit)) { + None => return Err("Too large decimal number"), + Some(v) => value = v, + } + } + None => { + // Allow embedded underscores, but fail on anything else. + if ch != '_' { + return Err("Invalid character in decimal number"); + } + } + } + } + } + + if digits == 0 { + return Err("No digits in number"); + } + + Ok(value) +} + +impl FromStr for Uimm64 { + type Err = &'static str; + + // Parse a decimal or hexadecimal `Uimm64`, formatted as above. + fn from_str(s: &str) -> Result { + parse_u64(s).map(Self::new) + } +} + +/// 8-bit unsigned integer immediate operand. +/// +/// This is used to indicate lane indexes typically. +pub type Uimm8 = u8; + +/// A 32-bit unsigned integer immediate operand. +/// +/// This is used to represent sizes of memory objects. +#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct Uimm32(u32); + +impl From for u32 { + fn from(val: Uimm32) -> u32 { + val.0 + } +} + +impl From for u64 { + fn from(val: Uimm32) -> u64 { + val.0.into() + } +} + +impl From for i64 { + fn from(val: Uimm32) -> i64 { + i64::from(val.0) + } +} + +impl From for Uimm32 { + fn from(x: u32) -> Self { + Self(x) + } +} + +impl Display for Uimm32 { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + if self.0 < 10_000 { + write!(f, "{}", self.0) + } else { + write_hex(u64::from(self.0), f) + } + } +} + +impl FromStr for Uimm32 { + type Err = &'static str; + + // Parse a decimal or hexadecimal `Uimm32`, formatted as above. + fn from_str(s: &str) -> Result { + parse_i64(s).and_then(|x| { + if 0 <= x && x <= i64::from(u32::MAX) { + Ok(Self(x as u32)) + } else { + Err("Uimm32 out of range") + } + }) + } +} + +/// A 128-bit immediate operand. +/// +/// This is used as an immediate value in SIMD instructions. +#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct V128Imm(pub [u8; 16]); + +impl V128Imm { + /// Iterate over the bytes in the constant. + pub fn bytes(&self) -> impl Iterator { + self.0.iter() + } + + /// Convert the immediate into a vector. + pub fn to_vec(self) -> Vec { + self.0.to_vec() + } + + /// Convert the immediate into a slice. + pub fn as_slice(&self) -> &[u8] { + &self.0[..] + } +} + +impl From<&[u8]> for V128Imm { + fn from(slice: &[u8]) -> Self { + assert_eq!(slice.len(), 16); + let mut buffer = [0; 16]; + buffer.copy_from_slice(slice); + Self(buffer) + } +} + +impl From for V128Imm { + fn from(val: u128) -> Self { + V128Imm(val.to_le_bytes()) + } +} + +/// 32-bit signed immediate offset. +/// +/// This is used to encode an immediate offset for load/store instructions. All supported ISAs have +/// a maximum load/store offset that fits in an `i32`. +#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct Offset32(i32); + +impl Offset32 { + /// Create a new `Offset32` representing the signed number `x`. + pub fn new(x: i32) -> Self { + Self(x) + } + + /// Create a new `Offset32` representing the signed number `x` if possible. + pub fn try_from_i64(x: i64) -> Option { + let x = i32::try_from(x).ok()?; + Some(Self::new(x)) + } + + /// Add in the signed number `x` if possible. + pub fn try_add_i64(self, x: i64) -> Option { + let x = i32::try_from(x).ok()?; + let ret = self.0.checked_add(x)?; + Some(Self::new(ret)) + } +} + +impl From for i32 { + fn from(val: Offset32) -> i32 { + val.0 + } +} + +impl From for i64 { + fn from(val: Offset32) -> i64 { + i64::from(val.0) + } +} + +impl From for Offset32 { + fn from(x: i32) -> Self { + Self(x) + } +} + +impl From for Offset32 { + fn from(val: u8) -> Offset32 { + Self(val.into()) + } +} + +impl Display for Offset32 { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + // 0 displays as an empty offset. + if self.0 == 0 { + return Ok(()); + } + + // Always include a sign. + write!(f, "{}", if self.0 < 0 { '-' } else { '+' })?; + + let val = i64::from(self.0).abs(); + if val < 10_000 { + write!(f, "{val}") + } else { + write_hex(val as u64, f) + } + } +} + +impl FromStr for Offset32 { + type Err = &'static str; + + // Parse a decimal or hexadecimal `Offset32`, formatted as above. + fn from_str(s: &str) -> Result { + if !(s.starts_with('-') || s.starts_with('+')) { + return Err("Offset must begin with sign"); + } + parse_i64(s).and_then(|x| { + if i64::from(i32::MIN) <= x && x <= i64::from(i32::MAX) { + Ok(Self::new(x as i32)) + } else { + Err("Offset out of range") + } + }) + } +} + +// FIXME(rust-lang/rust#83527): Replace with `${ignore()}` once it is stabilised. +macro_rules! ignore { + ($($t:tt)*) => {}; +} + +macro_rules! ieee_float { + ( + name = $name:ident, + bits = $bits:literal, + significand_bits = $significand_bits:literal, + bits_ty = $bits_ty:ident, + float_ty = $float_ty:ident, + $(as_float = $as_float:ident,)? + $(rust_type_not_stable = $rust_type_not_stable:ident,)? + ) => { + /// An IEEE + #[doc = concat!("binary", stringify!($bits))] + /// immediate floating point value, represented as a + #[doc = stringify!($bits_ty)] + /// containing the bit pattern. + /// + /// We specifically avoid using a + #[doc = stringify!($float_ty)] + /// here since some architectures may silently alter floats. + /// See: + /// + /// The [PartialEq] and [Hash] implementations are over the underlying bit pattern, but + /// [PartialOrd] respects IEEE754 semantics. + /// + /// All bit patterns are allowed. + #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] + #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] + #[repr(C)] + pub struct $name { + bits: $bits_ty + } + + impl $name { + const BITS: u8 = $bits; + const SIGNIFICAND_BITS: u8 = $significand_bits; + const EXPONENT_BITS: u8 = Self::BITS - Self::SIGNIFICAND_BITS - 1; + const SIGN_MASK: $bits_ty = 1 << (Self::EXPONENT_BITS + Self::SIGNIFICAND_BITS); + const SIGNIFICAND_MASK: $bits_ty = $bits_ty::MAX >> (Self::EXPONENT_BITS + 1); + const EXPONENT_MASK: $bits_ty = !Self::SIGN_MASK & !Self::SIGNIFICAND_MASK; + /// The positive WebAssembly canonical NaN. + pub const NAN: Self = Self::with_bits(Self::EXPONENT_MASK | (1 << (Self::SIGNIFICAND_BITS - 1))); + + /// Create a new + #[doc = concat!("`", stringify!($name), "`")] + /// containing the bits of `bits`. + pub const fn with_bits(bits: $bits_ty) -> Self { + Self { bits } + } + + /// Get the bitwise representation. + pub fn bits(self) -> $bits_ty { + self.bits + } + + $( + /// Create a new + #[doc = concat!("`", stringify!($name), "`")] + /// representing the number `x`. + pub fn with_float(x: $float_ty) -> Self { + Self::with_bits(x.to_bits()) + } + + /// Converts `self` to a Rust + #[doc = concat!("`", stringify!($float_ty), "`.")] + pub fn $as_float(self) -> $float_ty { + $float_ty::from_bits(self.bits()) + } + )? + + /// Computes the absolute value of `self`. + pub fn abs(self) -> Self { + Self::with_bits(self.bits() & !Self::SIGN_MASK) + } + + /// Returns a number composed of the magnitude of `self` and the sign of `sign`. + pub fn copysign(self, sign: Self) -> Self { + Self::with_bits((self.bits() & !Self::SIGN_MASK) | (sign.bits() & Self::SIGN_MASK)) + } + + /// Returns the minimum of `self` and `other`, following the WebAssembly/IEEE 754-2019 definition. + pub fn minimum(self, other: Self) -> Self { + // FIXME: Replace with Rust float method once it is stabilised. + if self.is_nan() || other.is_nan() { + Self::NAN + } else if self.is_zero() && other.is_zero() { + if self.is_negative() { + self + } else { + other + } + } else if self <= other { + self + } else { + other + } + } + + /// Returns the maximum of `self` and `other`, following the WebAssembly/IEEE 754-2019 definition. + pub fn maximum(self, other: Self) -> Self { + // FIXME: Replace with Rust float method once it is stabilised. + if self.is_nan() || other.is_nan() { + Self::NAN + } else if self.is_zero() && other.is_zero() { + if self.is_positive() { + self + } else { + other + } + } else if self >= other { + self + } else { + other + } + } + + /// Create an + #[doc = concat!("`", stringify!($name), "`")] + /// number representing `2.0^n`. + pub fn pow2>(n: I) -> Self { + let n = n.into(); + let w = Self::EXPONENT_BITS; + let t = Self::SIGNIFICAND_BITS; + let bias = (1 << (w - 1)) - 1; + let exponent = n + bias; + assert!(exponent > 0, "Underflow n={}", n); + assert!(exponent < (1 << w) + 1, "Overflow n={}", n); + Self::with_bits((exponent as $bits_ty) << t) + } + + /// Create an + #[doc = concat!("`", stringify!($name), "`")] + /// number representing the greatest negative value not convertible from + #[doc = concat!("`", stringify!($float_ty), "`")] + /// to a signed integer with width n. + pub fn fcvt_to_sint_negative_overflow>(n: I) -> Self { + let n = n.into(); + debug_assert!(n < i32::from(Self::BITS)); + debug_assert!(i32::from(Self::SIGNIFICAND_BITS) + 1 - n < i32::from(Self::BITS)); + Self::with_bits((1 << (Self::BITS - 1)) | Self::pow2(n - 1).bits() | (1 << (i32::from(Self::SIGNIFICAND_BITS) + 1 - n))) + } + + /// Check if the value is a NaN. For + #[doc = concat!("`", stringify!($name), "`,")] + /// this means checking that all the exponent bits are set and the significand is non-zero. + pub fn is_nan(self) -> bool { + self.abs().bits() > Self::EXPONENT_MASK + } + + /// Returns true if `self` has a negative sign, including 0.0, NaNs with positive sign bit and positive infinity. + pub fn is_positive(self) -> bool { + !self.is_negative() + } + + /// Returns true if `self` has a negative sign, including -0.0, NaNs with negative sign bit and negative infinity. + pub fn is_negative(self) -> bool { + self.bits() & Self::SIGN_MASK == Self::SIGN_MASK + } + + /// Returns `true` if `self` is positive or negative zero. + pub fn is_zero(self) -> bool { + self.abs().bits() == 0 + } + + /// Returns `None` if `self` is a NaN and `Some(self)` otherwise. + pub fn non_nan(self) -> Option { + Some(self).filter(|f| !f.is_nan()) + } + + $( + /// Returns the square root of `self`. + pub fn sqrt(self) -> Self { + Self::with_float(self.$as_float().sqrt()) + } + + /// Returns the smallest integer greater than or equal to `self`. + pub fn ceil(self) -> Self { + Self::with_float(self.$as_float().ceil()) + } + + /// Returns the largest integer less than or equal to `self`. + pub fn floor(self) -> Self { + Self::with_float(self.$as_float().floor()) + } + + /// Returns the integer part of `self`. This means that non-integer numbers are always truncated towards zero. + pub fn trunc(self) -> Self { + Self::with_float(self.$as_float().trunc()) + } + + /// Returns the nearest integer to `self`. Rounds half-way cases to the number + /// with an even least significant digit. + pub fn round_ties_even(self) -> Self { + Self::with_float(self.$as_float().round_ties_even()) + } + )? + } + + impl PartialOrd for $name { + fn partial_cmp(&self, rhs: &Self) -> Option { + $(self.$as_float().partial_cmp(&rhs.$as_float()))? + $( + ignore!($rust_type_not_stable); + // FIXME(#8312): Use builtin Rust comparisons once `f16` and `f128` support is stabalised. + if self.is_nan() || rhs.is_nan() { + // One of the floats is a NaN. + return None; + } + if self.is_zero() || rhs.is_zero() { + // Zeros are always equal regardless of sign. + return Some(Ordering::Equal); + } + let lhs_positive = self.is_positive(); + let rhs_positive = rhs.is_positive(); + if lhs_positive != rhs_positive { + // Different signs: negative < positive + return lhs_positive.partial_cmp(&rhs_positive); + } + // Finite or infinity will order correctly with an integer comparison of the bits. + if lhs_positive { + self.bits().partial_cmp(&rhs.bits()) + } else { + // Reverse the comparison when both floats are negative. + rhs.bits().partial_cmp(&self.bits()) + } + )? + } + } + + impl Display for $name { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + format_float(u128::from(self.bits()), Self::EXPONENT_BITS, Self::SIGNIFICAND_BITS, f) + } + } + + impl FromStr for $name { + type Err = &'static str; + + fn from_str(s: &str) -> Result { + match parse_float(s, Self::EXPONENT_BITS, Self::SIGNIFICAND_BITS) { + Ok(b) => Ok(Self::with_bits(b.try_into().unwrap())), + Err(s) => Err(s), + } + } + } + + impl IntoBytes for $name { + fn into_bytes(self) -> Vec { + self.bits().to_le_bytes().to_vec() + } + } + + impl Neg for $name { + type Output = Self; + + fn neg(self) -> Self { + Self::with_bits(self.bits() ^ Self::SIGN_MASK) + } + } + + + + $( + impl From<$float_ty> for $name { + fn from(x: $float_ty) -> Self { + Self::with_float(x) + } + } + + impl Add for $name { + type Output = Self; + + fn add(self, rhs: Self) -> Self { + Self::with_float(self.$as_float() + rhs.$as_float()) + } + } + + impl Sub for $name { + type Output = Self; + + fn sub(self, rhs: Self) -> Self { + Self::with_float(self.$as_float() - rhs.$as_float()) + } + } + + impl Mul for $name { + type Output = Self; + + fn mul(self, rhs: Self) -> Self { + Self::with_float(self.$as_float() * rhs.$as_float()) + } + } + + impl Div for $name { + type Output = Self; + + fn div(self, rhs: Self) -> Self::Output { + Self::with_float(self.$as_float() / rhs.$as_float()) + } + } + )? + + impl BitAnd for $name { + type Output = Self; + + fn bitand(self, rhs: Self) -> Self { + Self::with_bits(self.bits() & rhs.bits()) + } + } + + impl BitOr for $name { + type Output = Self; + + fn bitor(self, rhs: Self) -> Self { + Self::with_bits(self.bits() | rhs.bits()) + } + } + + impl BitXor for $name { + type Output = Self; + + fn bitxor(self, rhs: Self) -> Self { + Self::with_bits(self.bits() ^ rhs.bits()) + } + } + + impl Not for $name { + type Output = Self; + + fn not(self) -> Self { + Self::with_bits(!self.bits()) + } + } + }; +} + +ieee_float! { + name = Ieee16, + bits = 16, + significand_bits = 10, + bits_ty = u16, + float_ty = f16, + rust_type_not_stable = rust_type_not_stable, +} + +ieee_float! { + name = Ieee32, + bits = 32, + significand_bits = 23, + bits_ty = u32, + float_ty = f32, + as_float = as_f32, +} + +ieee_float! { + name = Ieee64, + bits = 64, + significand_bits = 52, + bits_ty = u64, + float_ty = f64, + as_float = as_f64, +} + +ieee_float! { + name = Ieee128, + bits = 128, + significand_bits = 112, + bits_ty = u128, + float_ty = f128, + rust_type_not_stable = rust_type_not_stable, +} + +/// Format a floating point number in a way that is reasonably human-readable, and that can be +/// converted back to binary without any rounding issues. The hexadecimal formatting of normal and +/// subnormal numbers is compatible with C99 and the `printf "%a"` format specifier. The NaN and Inf +/// formats are not supported by C99. +/// +/// The encoding parameters are: +/// +/// w - exponent field width in bits +/// t - trailing significand field width in bits +/// +fn format_float(bits: u128, w: u8, t: u8, f: &mut Formatter) -> fmt::Result { + debug_assert!(w > 0 && w <= 16, "Invalid exponent range"); + debug_assert!(1 + w + t <= 128, "Too large IEEE format for u128"); + debug_assert!((t + w + 1).is_power_of_two(), "Unexpected IEEE format size"); + + let max_e_bits = (1u128 << w) - 1; + let t_bits = bits & ((1u128 << t) - 1); // Trailing significand. + let e_bits = (bits >> t) & max_e_bits; // Biased exponent. + let sign_bit = (bits >> (w + t)) & 1; + + let bias: i32 = (1 << (w - 1)) - 1; + let e = e_bits as i32 - bias; // Unbiased exponent. + let emin = 1 - bias; // Minimum exponent. + + // How many hexadecimal digits are needed for the trailing significand? + let digits = (t + 3) / 4; + // Trailing significand left-aligned in `digits` hexadecimal digits. + let left_t_bits = t_bits << (4 * digits - t); + + // All formats share the leading sign. + if sign_bit != 0 { + write!(f, "-")?; + } + + if e_bits == 0 { + if t_bits == 0 { + // Zero. + write!(f, "0.0") + } else { + // Subnormal. + write!( + f, + "0x0.{0:01$x}p{2}", + left_t_bits, + usize::from(digits), + emin + ) + } + } else if e_bits == max_e_bits { + // Always print a `+` or `-` sign for these special values. + // This makes them easier to parse as they can't be confused as identifiers. + if sign_bit == 0 { + write!(f, "+")?; + } + if t_bits == 0 { + // Infinity. + write!(f, "Inf") + } else { + // NaN. + let payload = t_bits & ((1 << (t - 1)) - 1); + if t_bits & (1 << (t - 1)) != 0 { + // Quiet NaN. + if payload != 0 { + write!(f, "NaN:0x{payload:x}") + } else { + write!(f, "NaN") + } + } else { + // Signaling NaN. + write!(f, "sNaN:0x{payload:x}") + } + } + } else { + // Normal number. + write!(f, "0x1.{0:01$x}p{2}", left_t_bits, usize::from(digits), e) + } +} + +/// Parse a float using the same format as `format_float` above. +/// +/// The encoding parameters are: +/// +/// w - exponent field width in bits +/// t - trailing significand field width in bits +/// +fn parse_float(s: &str, w: u8, t: u8) -> Result { + debug_assert!(w > 0 && w <= 16, "Invalid exponent range"); + debug_assert!(1 + w + t <= 128, "Too large IEEE format for u128"); + debug_assert!((t + w + 1).is_power_of_two(), "Unexpected IEEE format size"); + + let (sign_bit, s2) = if let Some(num) = s.strip_prefix('-') { + (1u128 << (t + w), num) + } else if let Some(num) = s.strip_prefix('+') { + (0, num) + } else { + (0, s) + }; + + if !s2.starts_with("0x") { + let max_e_bits = ((1u128 << w) - 1) << t; + let quiet_bit = 1u128 << (t - 1); + + // The only decimal encoding allowed is 0. + if s2 == "0.0" { + return Ok(sign_bit); + } + + if s2 == "Inf" { + // +/- infinity: e = max, t = 0. + return Ok(sign_bit | max_e_bits); + } + if s2 == "NaN" { + // Canonical quiet NaN: e = max, t = quiet. + return Ok(sign_bit | max_e_bits | quiet_bit); + } + if let Some(nan) = s2.strip_prefix("NaN:0x") { + // Quiet NaN with payload. + return match u128::from_str_radix(nan, 16) { + Ok(payload) if payload < quiet_bit => { + Ok(sign_bit | max_e_bits | quiet_bit | payload) + } + _ => Err("Invalid NaN payload"), + }; + } + if let Some(nan) = s2.strip_prefix("sNaN:0x") { + // Signaling NaN with payload. + return match u128::from_str_radix(nan, 16) { + Ok(payload) if 0 < payload && payload < quiet_bit => { + Ok(sign_bit | max_e_bits | payload) + } + _ => Err("Invalid sNaN payload"), + }; + } + + return Err("Float must be hexadecimal"); + } + let s3 = &s2[2..]; + + let mut digits = 0u8; + let mut digits_before_period: Option = None; + let mut significand = 0u128; + let mut exponent = 0i32; + + for (idx, ch) in s3.char_indices() { + match ch { + '.' => { + // This is the radix point. There can only be one. + if digits_before_period != None { + return Err("Multiple radix points"); + } else { + digits_before_period = Some(digits); + } + } + 'p' => { + // The following exponent is a decimal number. + let exp_str = &s3[1 + idx..]; + match exp_str.parse::() { + Ok(e) => { + exponent = i32::from(e); + break; + } + Err(_) => return Err("Bad exponent"), + } + } + _ => match ch.to_digit(16) { + Some(digit) => { + digits += 1; + if digits > 32 { + return Err("Too many digits"); + } + significand = (significand << 4) | u128::from(digit); + } + None => return Err("Invalid character"), + }, + } + } + + if digits == 0 { + return Err("No digits"); + } + + if significand == 0 { + // This is +/- 0.0. + return Ok(sign_bit); + } + + // Number of bits appearing after the radix point. + match digits_before_period { + None => {} // No radix point present. + Some(d) => exponent -= 4 * i32::from(digits - d), + }; + + // Normalize the significand and exponent. + let significant_bits = (128 - significand.leading_zeros()) as u8; + if significant_bits > t + 1 { + let adjust = significant_bits - (t + 1); + if significand & ((1u128 << adjust) - 1) != 0 { + return Err("Too many significant bits"); + } + // Adjust significand down. + significand >>= adjust; + exponent += i32::from(adjust); + } else { + let adjust = t + 1 - significant_bits; + significand <<= adjust; + exponent -= i32::from(adjust); + } + debug_assert_eq!(significand >> t, 1); + + // Trailing significand excludes the high bit. + let t_bits = significand & ((1 << t) - 1); + + let max_exp = (1i32 << w) - 2; + let bias: i32 = (1 << (w - 1)) - 1; + exponent += bias + i32::from(t); + + if exponent > max_exp { + Err("Magnitude too large") + } else if exponent > 0 { + // This is a normal number. + let e_bits = (exponent as u128) << t; + Ok(sign_bit | e_bits | t_bits) + } else if 1 - exponent <= i32::from(t) { + // This is a subnormal number: e = 0, t = significand bits. + // Renormalize significand for exponent = 1. + let adjust = 1 - exponent; + if significand & ((1u128 << adjust) - 1) != 0 { + Err("Subnormal underflow") + } else { + significand >>= adjust; + Ok(sign_bit | significand) + } + } else { + Err("Magnitude too small") + } +} + +#[cfg(test)] +mod tests { + use super::*; + use alloc::string::ToString; + use core::{f32, f64}; + + #[test] + fn format_imm64() { + assert_eq!(Imm64(0).to_string(), "0"); + assert_eq!(Imm64(9999).to_string(), "9999"); + assert_eq!(Imm64(10000).to_string(), "0x2710"); + assert_eq!(Imm64(-9999).to_string(), "-9999"); + assert_eq!(Imm64(-10000).to_string(), "-10000"); + assert_eq!(Imm64(0xffff).to_string(), "0xffff"); + assert_eq!(Imm64(0x10000).to_string(), "0x0001_0000"); + } + + #[test] + fn format_uimm64() { + assert_eq!(Uimm64(0).to_string(), "0"); + assert_eq!(Uimm64(9999).to_string(), "9999"); + assert_eq!(Uimm64(10000).to_string(), "0x2710"); + assert_eq!(Uimm64(-9999i64 as u64).to_string(), "0xffff_ffff_ffff_d8f1"); + assert_eq!( + Uimm64(-10000i64 as u64).to_string(), + "0xffff_ffff_ffff_d8f0" + ); + assert_eq!(Uimm64(0xffff).to_string(), "0xffff"); + assert_eq!(Uimm64(0x10000).to_string(), "0x0001_0000"); + } + + // Verify that `text` can be parsed as a `T` into a value that displays as `want`. + #[track_caller] + fn parse_ok(text: &str, want: &str) + where + ::Err: Display, + { + match text.parse::() { + Err(s) => panic!("\"{text}\".parse() error: {s}"), + Ok(x) => assert_eq!(x.to_string(), want), + } + } + + // Verify that `text` fails to parse as `T` with the error `msg`. + fn parse_err(text: &str, msg: &str) + where + ::Err: Display, + { + match text.parse::() { + Err(s) => assert_eq!(s.to_string(), msg), + Ok(x) => panic!("Wanted Err({msg}), but got {x}"), + } + } + + #[test] + fn parse_imm64() { + parse_ok::("0", "0"); + parse_ok::("1", "1"); + parse_ok::("-0", "0"); + parse_ok::("-1", "-1"); + parse_ok::("0x0", "0"); + parse_ok::("0xf", "15"); + parse_ok::("-0x9", "-9"); + + // Probe limits. + parse_ok::("0xffffffff_ffffffff", "-1"); + parse_ok::("0x80000000_00000000", "-9223372036854775808"); + parse_ok::("-0x80000000_00000000", "-9223372036854775808"); + parse_err::("-0x80000000_00000001", "Negative number too small"); + parse_ok::("18446744073709551615", "-1"); + parse_ok::("-9223372036854775808", "-9223372036854775808"); + // Overflow both the `checked_add` and `checked_mul`. + parse_err::("18446744073709551616", "Too large decimal number"); + parse_err::("184467440737095516100", "Too large decimal number"); + parse_err::("-9223372036854775809", "Negative number too small"); + + // Underscores are allowed where digits go. + parse_ok::("0_0", "0"); + parse_ok::("-_10_0", "-100"); + parse_ok::("_10_", "10"); + parse_ok::("0x97_88_bb", "0x0097_88bb"); + parse_ok::("0x_97_", "151"); + + parse_err::("", "No digits in number"); + parse_err::("-", "No digits in number"); + parse_err::("_", "No digits in number"); + parse_err::("0x", "No digits in number"); + parse_err::("0x_", "No digits in number"); + parse_err::("-0x", "No digits in number"); + parse_err::(" ", "Invalid character in decimal number"); + parse_err::("0 ", "Invalid character in decimal number"); + parse_err::(" 0", "Invalid character in decimal number"); + parse_err::("--", "Invalid character in decimal number"); + parse_err::("-0x-", "Invalid character in hexadecimal number"); + parse_err::("abc", "Invalid character in decimal number"); + parse_err::("-abc", "Invalid character in decimal number"); + + // Hex count overflow. + parse_err::("0x0_0000_0000_0000_0000", "Too many hexadecimal digits"); + } + + #[test] + fn parse_uimm64() { + parse_ok::("0", "0"); + parse_ok::("1", "1"); + parse_ok::("0x0", "0"); + parse_ok::("0xf", "15"); + parse_ok::("0xffffffff_fffffff7", "0xffff_ffff_ffff_fff7"); + + // Probe limits. + parse_ok::("0xffffffff_ffffffff", "0xffff_ffff_ffff_ffff"); + parse_ok::("0x80000000_00000000", "0x8000_0000_0000_0000"); + parse_ok::("18446744073709551615", "0xffff_ffff_ffff_ffff"); + // Overflow both the `checked_add` and `checked_mul`. + parse_err::("18446744073709551616", "Too large decimal number"); + parse_err::("184467440737095516100", "Too large decimal number"); + + // Underscores are allowed where digits go. + parse_ok::("0_0", "0"); + parse_ok::("_10_", "10"); + parse_ok::("0x97_88_bb", "0x0097_88bb"); + parse_ok::("0x_97_", "151"); + + parse_err::("", "No digits in number"); + parse_err::("_", "No digits in number"); + parse_err::("0x", "No digits in number"); + parse_err::("0x_", "No digits in number"); + parse_err::("-", "Invalid character in decimal number"); + parse_err::("-0x", "Invalid character in hexadecimal number"); + parse_err::(" ", "Invalid character in decimal number"); + parse_err::("0 ", "Invalid character in decimal number"); + parse_err::(" 0", "Invalid character in decimal number"); + parse_err::("--", "Invalid character in decimal number"); + parse_err::("-0x-", "Invalid character in hexadecimal number"); + parse_err::("-0", "Invalid character in decimal number"); + parse_err::("-1", "Invalid character in decimal number"); + parse_err::("abc", "Invalid character in decimal number"); + parse_err::("-abc", "Invalid character in decimal number"); + + // Hex count overflow. + parse_err::("0x0_0000_0000_0000_0000", "Too many hexadecimal digits"); + } + + #[test] + fn format_offset32() { + assert_eq!(Offset32(0).to_string(), ""); + assert_eq!(Offset32(1).to_string(), "+1"); + assert_eq!(Offset32(-1).to_string(), "-1"); + assert_eq!(Offset32(9999).to_string(), "+9999"); + assert_eq!(Offset32(10000).to_string(), "+0x2710"); + assert_eq!(Offset32(-9999).to_string(), "-9999"); + assert_eq!(Offset32(-10000).to_string(), "-0x2710"); + assert_eq!(Offset32(0xffff).to_string(), "+0xffff"); + assert_eq!(Offset32(0x10000).to_string(), "+0x0001_0000"); + } + + #[test] + fn parse_offset32() { + parse_ok::("+0", ""); + parse_ok::("+1", "+1"); + parse_ok::("-0", ""); + parse_ok::("-1", "-1"); + parse_ok::("+0x0", ""); + parse_ok::("+0xf", "+15"); + parse_ok::("-0x9", "-9"); + parse_ok::("-0x8000_0000", "-0x8000_0000"); + + parse_err::("+0x8000_0000", "Offset out of range"); + } + + #[test] + fn format_ieee16() { + assert_eq!(Ieee16::with_bits(0).to_string(), "0.0"); // 0.0 + assert_eq!(Ieee16::with_bits(0x8000).to_string(), "-0.0"); // -0.0 + assert_eq!(Ieee16::with_bits(0x3c00).to_string(), "0x1.000p0"); // 1.0 + assert_eq!(Ieee16::with_bits(0x3e00).to_string(), "0x1.800p0"); // 1.5 + assert_eq!(Ieee16::with_bits(0x3800).to_string(), "0x1.000p-1"); // 0.5 + assert_eq!( + Ieee16::with_bits(0x1400).to_string(), // `f16::EPSILON` + "0x1.000p-10" + ); + assert_eq!( + Ieee16::with_bits(0xfbff).to_string(), // `f16::MIN` + "-0x1.ffcp15" + ); + assert_eq!( + Ieee16::with_bits(0x7bff).to_string(), // `f16::MAX` + "0x1.ffcp15" + ); + // Smallest positive normal number. + assert_eq!( + Ieee16::with_bits(0x0400).to_string(), // `f16::MIN_POSITIVE` + "0x1.000p-14" + ); + // Subnormals. + assert_eq!( + Ieee16::with_bits(0x0200).to_string(), // `f16::MIN_POSITIVE / 2.0` + "0x0.800p-14" + ); + assert_eq!( + Ieee16::with_bits(0x0001).to_string(), // `f16::MIN_POSITIVE * f16::EPSILON` + "0x0.004p-14" + ); + assert_eq!( + Ieee16::with_bits(0x7c00).to_string(), // `f16::INFINITY` + "+Inf" + ); + assert_eq!( + Ieee16::with_bits(0xfc00).to_string(), // `f16::NEG_INFINITY` + "-Inf" + ); + assert_eq!( + Ieee16::with_bits(0x7e00).to_string(), // `f16::NAN` + "+NaN" + ); + assert_eq!( + Ieee16::with_bits(0xfe00).to_string(), // `-f16::NAN` + "-NaN" + ); + // Construct some qNaNs with payloads. + assert_eq!(Ieee16::with_bits(0x7e01).to_string(), "+NaN:0x1"); + assert_eq!(Ieee16::with_bits(0x7f01).to_string(), "+NaN:0x101"); + // Signaling NaNs. + assert_eq!(Ieee16::with_bits(0x7c01).to_string(), "+sNaN:0x1"); + assert_eq!(Ieee16::with_bits(0x7d01).to_string(), "+sNaN:0x101"); + } + + #[test] + fn parse_ieee16() { + parse_ok::("0.0", "0.0"); + parse_ok::("+0.0", "0.0"); + parse_ok::("-0.0", "-0.0"); + parse_ok::("0x0", "0.0"); + parse_ok::("0x0.0", "0.0"); + parse_ok::("0x.0", "0.0"); + parse_ok::("0x0.", "0.0"); + parse_ok::("0x1", "0x1.000p0"); + parse_ok::("+0x1", "0x1.000p0"); + parse_ok::("-0x1", "-0x1.000p0"); + parse_ok::("0x10", "0x1.000p4"); + parse_ok::("0x10.0", "0x1.000p4"); + parse_err::("0.", "Float must be hexadecimal"); + parse_err::(".0", "Float must be hexadecimal"); + parse_err::("0", "Float must be hexadecimal"); + parse_err::("-0", "Float must be hexadecimal"); + parse_err::(".", "Float must be hexadecimal"); + parse_err::("", "Float must be hexadecimal"); + parse_err::("-", "Float must be hexadecimal"); + parse_err::("0x", "No digits"); + parse_err::("0x..", "Multiple radix points"); + + // Check significant bits. + parse_ok::("0x0.ffe", "0x1.ffcp-1"); + parse_ok::("0x1.ffc", "0x1.ffcp0"); + parse_ok::("0x3.ff8", "0x1.ffcp1"); + parse_ok::("0x7.ff", "0x1.ffcp2"); + parse_ok::("0xf.fe", "0x1.ffcp3"); + parse_err::("0x1.ffe", "Too many significant bits"); + parse_err::("0x1.ffc00000000000000000000000000000", "Too many digits"); + + // Exponents. + parse_ok::("0x1p3", "0x1.000p3"); + parse_ok::("0x1p-3", "0x1.000p-3"); + parse_ok::("0x1.0p3", "0x1.000p3"); + parse_ok::("0x2.0p3", "0x1.000p4"); + parse_ok::("0x1.0p15", "0x1.000p15"); + parse_ok::("0x1.0p-14", "0x1.000p-14"); + parse_ok::("0x0.1p-10", "0x1.000p-14"); + parse_err::("0x2.0p15", "Magnitude too large"); + + // Subnormals. + parse_ok::("0x1.0p-15", "0x0.800p-14"); + parse_ok::("0x1.0p-24", "0x0.004p-14"); + parse_ok::("0x0.004p-14", "0x0.004p-14"); + parse_err::("0x0.102p-14", "Subnormal underflow"); + parse_err::("0x1.8p-24", "Subnormal underflow"); + parse_err::("0x1.0p-25", "Magnitude too small"); + + // NaNs and Infs. + parse_ok::("Inf", "+Inf"); + parse_ok::("+Inf", "+Inf"); + parse_ok::("-Inf", "-Inf"); + parse_ok::("NaN", "+NaN"); + parse_ok::("+NaN", "+NaN"); + parse_ok::("-NaN", "-NaN"); + parse_ok::("NaN:0x0", "+NaN"); + parse_err::("NaN:", "Float must be hexadecimal"); + parse_err::("NaN:0", "Float must be hexadecimal"); + parse_err::("NaN:0x", "Invalid NaN payload"); + parse_ok::("NaN:0x001", "+NaN:0x1"); + parse_ok::("NaN:0x101", "+NaN:0x101"); + parse_err::("NaN:0x301", "Invalid NaN payload"); + parse_ok::("sNaN:0x1", "+sNaN:0x1"); + parse_err::("sNaN:0x0", "Invalid sNaN payload"); + parse_ok::("sNaN:0x101", "+sNaN:0x101"); + parse_err::("sNaN:0x301", "Invalid sNaN payload"); + } + + #[test] + fn pow2_ieee16() { + assert_eq!(Ieee16::pow2(0).to_string(), "0x1.000p0"); + assert_eq!(Ieee16::pow2(1).to_string(), "0x1.000p1"); + assert_eq!(Ieee16::pow2(-1).to_string(), "0x1.000p-1"); + assert_eq!(Ieee16::pow2(15).to_string(), "0x1.000p15"); + assert_eq!(Ieee16::pow2(-14).to_string(), "0x1.000p-14"); + + assert_eq!((-Ieee16::pow2(1)).to_string(), "-0x1.000p1"); + } + + #[test] + fn fcvt_to_sint_negative_overflow_ieee16() { + // FIXME(#8312): Replace with commented out version once Rust f16 support is stabilised. + // let n = 8; + // assert_eq!( + // -((1u16 << (n - 1)) as f16) - 1.0, + // Ieee16::fcvt_to_sint_negative_overflow(n).as_f16() + // ); + let n = 8; + assert_eq!( + "-0x1.020p7", + Ieee16::fcvt_to_sint_negative_overflow(n).to_string() + ); + } + + #[test] + fn format_ieee32() { + assert_eq!(Ieee32::with_float(0.0).to_string(), "0.0"); + assert_eq!(Ieee32::with_float(-0.0).to_string(), "-0.0"); + assert_eq!(Ieee32::with_float(1.0).to_string(), "0x1.000000p0"); + assert_eq!(Ieee32::with_float(1.5).to_string(), "0x1.800000p0"); + assert_eq!(Ieee32::with_float(0.5).to_string(), "0x1.000000p-1"); + assert_eq!( + Ieee32::with_float(f32::EPSILON).to_string(), + "0x1.000000p-23" + ); + assert_eq!(Ieee32::with_float(f32::MIN).to_string(), "-0x1.fffffep127"); + assert_eq!(Ieee32::with_float(f32::MAX).to_string(), "0x1.fffffep127"); + // Smallest positive normal number. + assert_eq!( + Ieee32::with_float(f32::MIN_POSITIVE).to_string(), + "0x1.000000p-126" + ); + // Subnormals. + assert_eq!( + Ieee32::with_float(f32::MIN_POSITIVE / 2.0).to_string(), + "0x0.800000p-126" + ); + assert_eq!( + Ieee32::with_float(f32::MIN_POSITIVE * f32::EPSILON).to_string(), + "0x0.000002p-126" + ); + assert_eq!(Ieee32::with_float(f32::INFINITY).to_string(), "+Inf"); + assert_eq!(Ieee32::with_float(f32::NEG_INFINITY).to_string(), "-Inf"); + assert_eq!(Ieee32::with_float(f32::NAN).to_string(), "+NaN"); + assert_eq!(Ieee32::with_float(-f32::NAN).to_string(), "-NaN"); + // Construct some qNaNs with payloads. + assert_eq!(Ieee32::with_bits(0x7fc00001).to_string(), "+NaN:0x1"); + assert_eq!(Ieee32::with_bits(0x7ff00001).to_string(), "+NaN:0x300001"); + // Signaling NaNs. + assert_eq!(Ieee32::with_bits(0x7f800001).to_string(), "+sNaN:0x1"); + assert_eq!(Ieee32::with_bits(0x7fa00001).to_string(), "+sNaN:0x200001"); + } + + #[test] + fn parse_ieee32() { + parse_ok::("0.0", "0.0"); + parse_ok::("+0.0", "0.0"); + parse_ok::("-0.0", "-0.0"); + parse_ok::("0x0", "0.0"); + parse_ok::("0x0.0", "0.0"); + parse_ok::("0x.0", "0.0"); + parse_ok::("0x0.", "0.0"); + parse_ok::("0x1", "0x1.000000p0"); + parse_ok::("+0x1", "0x1.000000p0"); + parse_ok::("-0x1", "-0x1.000000p0"); + parse_ok::("0x10", "0x1.000000p4"); + parse_ok::("0x10.0", "0x1.000000p4"); + parse_err::("0.", "Float must be hexadecimal"); + parse_err::(".0", "Float must be hexadecimal"); + parse_err::("0", "Float must be hexadecimal"); + parse_err::("-0", "Float must be hexadecimal"); + parse_err::(".", "Float must be hexadecimal"); + parse_err::("", "Float must be hexadecimal"); + parse_err::("-", "Float must be hexadecimal"); + parse_err::("0x", "No digits"); + parse_err::("0x..", "Multiple radix points"); + + // Check significant bits. + parse_ok::("0x0.ffffff", "0x1.fffffep-1"); + parse_ok::("0x1.fffffe", "0x1.fffffep0"); + parse_ok::("0x3.fffffc", "0x1.fffffep1"); + parse_ok::("0x7.fffff8", "0x1.fffffep2"); + parse_ok::("0xf.fffff0", "0x1.fffffep3"); + parse_err::("0x1.ffffff", "Too many significant bits"); + parse_err::("0x1.fffffe00000000000000000000000000", "Too many digits"); + + // Exponents. + parse_ok::("0x1p3", "0x1.000000p3"); + parse_ok::("0x1p-3", "0x1.000000p-3"); + parse_ok::("0x1.0p3", "0x1.000000p3"); + parse_ok::("0x2.0p3", "0x1.000000p4"); + parse_ok::("0x1.0p127", "0x1.000000p127"); + parse_ok::("0x1.0p-126", "0x1.000000p-126"); + parse_ok::("0x0.1p-122", "0x1.000000p-126"); + parse_err::("0x2.0p127", "Magnitude too large"); + + // Subnormals. + parse_ok::("0x1.0p-127", "0x0.800000p-126"); + parse_ok::("0x1.0p-149", "0x0.000002p-126"); + parse_ok::("0x0.000002p-126", "0x0.000002p-126"); + parse_err::("0x0.100001p-126", "Subnormal underflow"); + parse_err::("0x1.8p-149", "Subnormal underflow"); + parse_err::("0x1.0p-150", "Magnitude too small"); + + // NaNs and Infs. + parse_ok::("Inf", "+Inf"); + parse_ok::("+Inf", "+Inf"); + parse_ok::("-Inf", "-Inf"); + parse_ok::("NaN", "+NaN"); + parse_ok::("+NaN", "+NaN"); + parse_ok::("-NaN", "-NaN"); + parse_ok::("NaN:0x0", "+NaN"); + parse_err::("NaN:", "Float must be hexadecimal"); + parse_err::("NaN:0", "Float must be hexadecimal"); + parse_err::("NaN:0x", "Invalid NaN payload"); + parse_ok::("NaN:0x000001", "+NaN:0x1"); + parse_ok::("NaN:0x300001", "+NaN:0x300001"); + parse_err::("NaN:0x400001", "Invalid NaN payload"); + parse_ok::("sNaN:0x1", "+sNaN:0x1"); + parse_err::("sNaN:0x0", "Invalid sNaN payload"); + parse_ok::("sNaN:0x200001", "+sNaN:0x200001"); + parse_err::("sNaN:0x400001", "Invalid sNaN payload"); + } + + #[test] + fn pow2_ieee32() { + assert_eq!(Ieee32::pow2(0).to_string(), "0x1.000000p0"); + assert_eq!(Ieee32::pow2(1).to_string(), "0x1.000000p1"); + assert_eq!(Ieee32::pow2(-1).to_string(), "0x1.000000p-1"); + assert_eq!(Ieee32::pow2(127).to_string(), "0x1.000000p127"); + assert_eq!(Ieee32::pow2(-126).to_string(), "0x1.000000p-126"); + + assert_eq!((-Ieee32::pow2(1)).to_string(), "-0x1.000000p1"); + } + + #[test] + fn fcvt_to_sint_negative_overflow_ieee32() { + for n in [8, 16] { + assert_eq!( + -((1u32 << (n - 1)) as f32) - 1.0, + Ieee32::fcvt_to_sint_negative_overflow(n).as_f32(), + "n = {n}" + ); + } + } + + #[test] + fn format_ieee64() { + assert_eq!(Ieee64::with_float(0.0).to_string(), "0.0"); + assert_eq!(Ieee64::with_float(-0.0).to_string(), "-0.0"); + assert_eq!(Ieee64::with_float(1.0).to_string(), "0x1.0000000000000p0"); + assert_eq!(Ieee64::with_float(1.5).to_string(), "0x1.8000000000000p0"); + assert_eq!(Ieee64::with_float(0.5).to_string(), "0x1.0000000000000p-1"); + assert_eq!( + Ieee64::with_float(f64::EPSILON).to_string(), + "0x1.0000000000000p-52" + ); + assert_eq!( + Ieee64::with_float(f64::MIN).to_string(), + "-0x1.fffffffffffffp1023" + ); + assert_eq!( + Ieee64::with_float(f64::MAX).to_string(), + "0x1.fffffffffffffp1023" + ); + // Smallest positive normal number. + assert_eq!( + Ieee64::with_float(f64::MIN_POSITIVE).to_string(), + "0x1.0000000000000p-1022" + ); + // Subnormals. + assert_eq!( + Ieee64::with_float(f64::MIN_POSITIVE / 2.0).to_string(), + "0x0.8000000000000p-1022" + ); + assert_eq!( + Ieee64::with_float(f64::MIN_POSITIVE * f64::EPSILON).to_string(), + "0x0.0000000000001p-1022" + ); + assert_eq!(Ieee64::with_float(f64::INFINITY).to_string(), "+Inf"); + assert_eq!(Ieee64::with_float(f64::NEG_INFINITY).to_string(), "-Inf"); + assert_eq!(Ieee64::with_float(f64::NAN).to_string(), "+NaN"); + assert_eq!(Ieee64::with_float(-f64::NAN).to_string(), "-NaN"); + // Construct some qNaNs with payloads. + assert_eq!( + Ieee64::with_bits(0x7ff8000000000001).to_string(), + "+NaN:0x1" + ); + assert_eq!( + Ieee64::with_bits(0x7ffc000000000001).to_string(), + "+NaN:0x4000000000001" + ); + // Signaling NaNs. + assert_eq!( + Ieee64::with_bits(0x7ff0000000000001).to_string(), + "+sNaN:0x1" + ); + assert_eq!( + Ieee64::with_bits(0x7ff4000000000001).to_string(), + "+sNaN:0x4000000000001" + ); + } + + #[test] + fn parse_ieee64() { + parse_ok::("0.0", "0.0"); + parse_ok::("-0.0", "-0.0"); + parse_ok::("0x0", "0.0"); + parse_ok::("0x0.0", "0.0"); + parse_ok::("0x.0", "0.0"); + parse_ok::("0x0.", "0.0"); + parse_ok::("0x1", "0x1.0000000000000p0"); + parse_ok::("-0x1", "-0x1.0000000000000p0"); + parse_ok::("0x10", "0x1.0000000000000p4"); + parse_ok::("0x10.0", "0x1.0000000000000p4"); + parse_err::("0.", "Float must be hexadecimal"); + parse_err::(".0", "Float must be hexadecimal"); + parse_err::("0", "Float must be hexadecimal"); + parse_err::("-0", "Float must be hexadecimal"); + parse_err::(".", "Float must be hexadecimal"); + parse_err::("", "Float must be hexadecimal"); + parse_err::("-", "Float must be hexadecimal"); + parse_err::("0x", "No digits"); + parse_err::("0x..", "Multiple radix points"); + + // Check significant bits. + parse_ok::("0x0.fffffffffffff8", "0x1.fffffffffffffp-1"); + parse_ok::("0x1.fffffffffffff", "0x1.fffffffffffffp0"); + parse_ok::("0x3.ffffffffffffe", "0x1.fffffffffffffp1"); + parse_ok::("0x7.ffffffffffffc", "0x1.fffffffffffffp2"); + parse_ok::("0xf.ffffffffffff8", "0x1.fffffffffffffp3"); + parse_err::("0x3.fffffffffffff", "Too many significant bits"); + parse_err::("0x001.fffffe000000000000000000000000", "Too many digits"); + + // Exponents. + parse_ok::("0x1p3", "0x1.0000000000000p3"); + parse_ok::("0x1p-3", "0x1.0000000000000p-3"); + parse_ok::("0x1.0p3", "0x1.0000000000000p3"); + parse_ok::("0x2.0p3", "0x1.0000000000000p4"); + parse_ok::("0x1.0p1023", "0x1.0000000000000p1023"); + parse_ok::("0x1.0p-1022", "0x1.0000000000000p-1022"); + parse_ok::("0x0.1p-1018", "0x1.0000000000000p-1022"); + parse_err::("0x2.0p1023", "Magnitude too large"); + + // Subnormals. + parse_ok::("0x1.0p-1023", "0x0.8000000000000p-1022"); + parse_ok::("0x1.0p-1074", "0x0.0000000000001p-1022"); + parse_ok::("0x0.0000000000001p-1022", "0x0.0000000000001p-1022"); + parse_err::("0x0.10000000000008p-1022", "Subnormal underflow"); + parse_err::("0x1.8p-1074", "Subnormal underflow"); + parse_err::("0x1.0p-1075", "Magnitude too small"); + + // NaNs and Infs. + parse_ok::("Inf", "+Inf"); + parse_ok::("-Inf", "-Inf"); + parse_ok::("NaN", "+NaN"); + parse_ok::("-NaN", "-NaN"); + parse_ok::("NaN:0x0", "+NaN"); + parse_err::("NaN:", "Float must be hexadecimal"); + parse_err::("NaN:0", "Float must be hexadecimal"); + parse_err::("NaN:0x", "Invalid NaN payload"); + parse_ok::("NaN:0x000001", "+NaN:0x1"); + parse_ok::("NaN:0x4000000000001", "+NaN:0x4000000000001"); + parse_err::("NaN:0x8000000000001", "Invalid NaN payload"); + parse_ok::("sNaN:0x1", "+sNaN:0x1"); + parse_err::("sNaN:0x0", "Invalid sNaN payload"); + parse_ok::("sNaN:0x4000000000001", "+sNaN:0x4000000000001"); + parse_err::("sNaN:0x8000000000001", "Invalid sNaN payload"); + } + + #[test] + fn pow2_ieee64() { + assert_eq!(Ieee64::pow2(0).to_string(), "0x1.0000000000000p0"); + assert_eq!(Ieee64::pow2(1).to_string(), "0x1.0000000000000p1"); + assert_eq!(Ieee64::pow2(-1).to_string(), "0x1.0000000000000p-1"); + assert_eq!(Ieee64::pow2(1023).to_string(), "0x1.0000000000000p1023"); + assert_eq!(Ieee64::pow2(-1022).to_string(), "0x1.0000000000000p-1022"); + + assert_eq!((-Ieee64::pow2(1)).to_string(), "-0x1.0000000000000p1"); + } + + #[test] + fn fcvt_to_sint_negative_overflow_ieee64() { + for n in [8, 16, 32] { + assert_eq!( + -((1u64 << (n - 1)) as f64) - 1.0, + Ieee64::fcvt_to_sint_negative_overflow(n).as_f64(), + "n = {n}" + ); + } + } + + #[test] + fn format_ieee128() { + assert_eq!( + Ieee128::with_bits(0x00000000000000000000000000000000).to_string(), // 0.0 + "0.0" + ); + assert_eq!( + Ieee128::with_bits(0x80000000000000000000000000000000).to_string(), // -0.0 + "-0.0" + ); + assert_eq!( + Ieee128::with_bits(0x3fff0000000000000000000000000000).to_string(), // 1.0 + "0x1.0000000000000000000000000000p0" + ); + assert_eq!( + Ieee128::with_bits(0x3fff8000000000000000000000000000).to_string(), // 1.5 + "0x1.8000000000000000000000000000p0" + ); + assert_eq!( + Ieee128::with_bits(0x3ffe0000000000000000000000000000).to_string(), // 0.5 + "0x1.0000000000000000000000000000p-1" + ); + assert_eq!( + Ieee128::with_bits(0x3f8f0000000000000000000000000000).to_string(), // `f128::EPSILON` + "0x1.0000000000000000000000000000p-112" + ); + assert_eq!( + Ieee128::with_bits(0xfffeffffffffffffffffffffffffffff).to_string(), // `f128::MIN` + "-0x1.ffffffffffffffffffffffffffffp16383" + ); + assert_eq!( + Ieee128::with_bits(0x7ffeffffffffffffffffffffffffffff).to_string(), // `f128::MAX` + "0x1.ffffffffffffffffffffffffffffp16383" + ); + // Smallest positive normal number. + assert_eq!( + Ieee128::with_bits(0x00010000000000000000000000000000).to_string(), // `f128::MIN_POSITIVE` + "0x1.0000000000000000000000000000p-16382" + ); + // Subnormals. + assert_eq!( + Ieee128::with_bits(0x00008000000000000000000000000000).to_string(), // `f128::MIN_POSITIVE / 2.0` + "0x0.8000000000000000000000000000p-16382" + ); + assert_eq!( + Ieee128::with_bits(0x00000000000000000000000000000001).to_string(), // `f128::MIN_POSITIVE * f128::EPSILON` + "0x0.0000000000000000000000000001p-16382" + ); + assert_eq!( + Ieee128::with_bits(0x7fff0000000000000000000000000000).to_string(), // `f128::INFINITY` + "+Inf" + ); + assert_eq!( + Ieee128::with_bits(0xffff0000000000000000000000000000).to_string(), // `f128::NEG_INFINITY` + "-Inf" + ); + assert_eq!( + Ieee128::with_bits(0x7fff8000000000000000000000000000).to_string(), // `f128::NAN` + "+NaN" + ); + assert_eq!( + Ieee128::with_bits(0xffff8000000000000000000000000000).to_string(), // `-f128::NAN` + "-NaN" + ); + // Construct some qNaNs with payloads. + assert_eq!( + Ieee128::with_bits(0x7fff8000000000000000000000000001).to_string(), + "+NaN:0x1" + ); + assert_eq!( + Ieee128::with_bits(0x7fffc000000000000000000000000001).to_string(), + "+NaN:0x4000000000000000000000000001" + ); + // Signaling NaNs. + assert_eq!( + Ieee128::with_bits(0x7fff0000000000000000000000000001).to_string(), + "+sNaN:0x1" + ); + assert_eq!( + Ieee128::with_bits(0x7fff4000000000000000000000000001).to_string(), + "+sNaN:0x4000000000000000000000000001" + ); + } + + #[test] + fn parse_ieee128() { + parse_ok::("0.0", "0.0"); + parse_ok::("-0.0", "-0.0"); + parse_ok::("0x0", "0.0"); + parse_ok::("0x0.0", "0.0"); + parse_ok::("0x.0", "0.0"); + parse_ok::("0x0.", "0.0"); + parse_ok::("0x1", "0x1.0000000000000000000000000000p0"); + parse_ok::("-0x1", "-0x1.0000000000000000000000000000p0"); + parse_ok::("0x10", "0x1.0000000000000000000000000000p4"); + parse_ok::("0x10.0", "0x1.0000000000000000000000000000p4"); + parse_err::("0.", "Float must be hexadecimal"); + parse_err::(".0", "Float must be hexadecimal"); + parse_err::("0", "Float must be hexadecimal"); + parse_err::("-0", "Float must be hexadecimal"); + parse_err::(".", "Float must be hexadecimal"); + parse_err::("", "Float must be hexadecimal"); + parse_err::("-", "Float must be hexadecimal"); + parse_err::("0x", "No digits"); + parse_err::("0x..", "Multiple radix points"); + + // Check significant bits. + parse_ok::( + "0x0.ffffffffffffffffffffffffffff8", + "0x1.ffffffffffffffffffffffffffffp-1", + ); + parse_ok::( + "0x1.ffffffffffffffffffffffffffff", + "0x1.ffffffffffffffffffffffffffffp0", + ); + parse_ok::( + "0x3.fffffffffffffffffffffffffffe", + "0x1.ffffffffffffffffffffffffffffp1", + ); + parse_ok::( + "0x7.fffffffffffffffffffffffffffc", + "0x1.ffffffffffffffffffffffffffffp2", + ); + parse_ok::( + "0xf.fffffffffffffffffffffffffff8", + "0x1.ffffffffffffffffffffffffffffp3", + ); + parse_err::( + "0x3.ffffffffffffffffffffffffffff", + "Too many significant bits", + ); + parse_err::("0x001.fffffe000000000000000000000000", "Too many digits"); + + // Exponents. + parse_ok::("0x1p3", "0x1.0000000000000000000000000000p3"); + parse_ok::("0x1p-3", "0x1.0000000000000000000000000000p-3"); + parse_ok::("0x1.0p3", "0x1.0000000000000000000000000000p3"); + parse_ok::("0x2.0p3", "0x1.0000000000000000000000000000p4"); + parse_ok::("0x1.0p16383", "0x1.0000000000000000000000000000p16383"); + parse_ok::("0x1.0p-16382", "0x1.0000000000000000000000000000p-16382"); + parse_ok::("0x0.1p-16378", "0x1.0000000000000000000000000000p-16382"); + parse_err::("0x2.0p16383", "Magnitude too large"); + + // Subnormals. + parse_ok::("0x1.0p-16383", "0x0.8000000000000000000000000000p-16382"); + parse_ok::("0x1.0p-16494", "0x0.0000000000000000000000000001p-16382"); + parse_ok::( + "0x0.0000000000000000000000000001p-16382", + "0x0.0000000000000000000000000001p-16382", + ); + parse_err::( + "0x0.10000000000000000000000000008p-16382", + "Subnormal underflow", + ); + parse_err::("0x1.8p-16494", "Subnormal underflow"); + parse_err::("0x1.0p-16495", "Magnitude too small"); + + // NaNs and Infs. + parse_ok::("Inf", "+Inf"); + parse_ok::("-Inf", "-Inf"); + parse_ok::("NaN", "+NaN"); + parse_ok::("-NaN", "-NaN"); + parse_ok::("NaN:0x0", "+NaN"); + parse_err::("NaN:", "Float must be hexadecimal"); + parse_err::("NaN:0", "Float must be hexadecimal"); + parse_err::("NaN:0x", "Invalid NaN payload"); + parse_ok::("NaN:0x000001", "+NaN:0x1"); + parse_ok::( + "NaN:0x4000000000000000000000000001", + "+NaN:0x4000000000000000000000000001", + ); + parse_err::("NaN:0x8000000000000000000000000001", "Invalid NaN payload"); + parse_ok::("sNaN:0x1", "+sNaN:0x1"); + parse_err::("sNaN:0x0", "Invalid sNaN payload"); + parse_ok::( + "sNaN:0x4000000000000000000000000001", + "+sNaN:0x4000000000000000000000000001", + ); + parse_err::( + "sNaN:0x8000000000000000000000000001", + "Invalid sNaN payload", + ); + } + + #[test] + fn pow2_ieee128() { + assert_eq!( + Ieee128::pow2(0).to_string(), + "0x1.0000000000000000000000000000p0" + ); + assert_eq!( + Ieee128::pow2(1).to_string(), + "0x1.0000000000000000000000000000p1" + ); + assert_eq!( + Ieee128::pow2(-1).to_string(), + "0x1.0000000000000000000000000000p-1" + ); + assert_eq!( + Ieee128::pow2(16383).to_string(), + "0x1.0000000000000000000000000000p16383" + ); + assert_eq!( + Ieee128::pow2(-16382).to_string(), + "0x1.0000000000000000000000000000p-16382" + ); + + assert_eq!( + (-Ieee128::pow2(1)).to_string(), + "-0x1.0000000000000000000000000000p1" + ); + } + + #[test] + fn fcvt_to_sint_negative_overflow_ieee128() { + // FIXME(#8312): Replace with commented out version once Rust f128 support is stabilised. + // for n in [8, 16, 32, 64] { + // assert_eq!( + // -((1u128 << (n - 1)) as f128) - 1.0, + // Ieee128::fcvt_to_sint_negative_overflow(n).as_f128(), + // "n = {n}" + // ); + // } + for (n, expected) in [ + (8, "-0x1.0200000000000000000000000000p7"), + (16, "-0x1.0002000000000000000000000000p15"), + (32, "-0x1.0000000200000000000000000000p31"), + (64, "-0x1.0000000000000002000000000000p63"), + ] { + assert_eq!( + expected, + Ieee128::fcvt_to_sint_negative_overflow(n).to_string(), + "n = {n}" + ); + } + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/instructions.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/instructions.rs new file mode 100644 index 000000000..8aa8d7cb0 --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/instructions.rs @@ -0,0 +1,1018 @@ +//! Instruction formats and opcodes. +//! +//! The `instructions` module contains definitions for instruction formats, opcodes, and the +//! in-memory representation of IR instructions. +//! +//! A large part of this module is auto-generated from the instruction descriptions in the meta +//! directory. + +use crate::constant_hash::Table; +use alloc::vec::Vec; +use core::fmt::{self, Display, Formatter}; +use core::ops::{Deref, DerefMut}; +use core::str::FromStr; + +#[cfg(feature = "enable-serde")] +use serde_derive::{Deserialize, Serialize}; + +use crate::bitset::ScalarBitSet; +use crate::entity; +use crate::ir::{ + self, + condcodes::{FloatCC, IntCC}, + trapcode::TrapCode, + types, Block, FuncRef, MemFlags, SigRef, StackSlot, Type, Value, +}; + +/// Some instructions use an external list of argument values because there is not enough space in +/// the 16-byte `InstructionData` struct. These value lists are stored in a memory pool in +/// `dfg.value_lists`. +pub type ValueList = entity::EntityList; + +/// Memory pool for holding value lists. See `ValueList`. +pub type ValueListPool = entity::ListPool; + +/// A pair of a Block and its arguments, stored in a single EntityList internally. +/// +/// NOTE: We don't expose either value_to_block or block_to_value outside of this module because +/// this operation is not generally safe. However, as the two share the same underlying layout, +/// they can be stored in the same value pool. +/// +/// BlockCall makes use of this shared layout by storing all of its contents (a block and its +/// argument) in a single EntityList. This is a bit better than introducing a new entity type for +/// the pair of a block name and the arguments entity list, as we don't pay any indirection penalty +/// to get to the argument values -- they're stored in-line with the block in the same list. +/// +/// The BlockCall::new function guarantees this layout by requiring a block argument that's written +/// in as the first element of the EntityList. Any subsequent entries are always assumed to be real +/// Values. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct BlockCall { + /// The underlying storage for the BlockCall. The first element of the values EntityList is + /// guaranteed to always be a Block encoded as a Value via BlockCall::block_to_value. + /// Consequently, the values entity list is never empty. + values: entity::EntityList, +} + +impl BlockCall { + // NOTE: the only uses of this function should be internal to BlockCall. See the block comment + // on BlockCall for more context. + fn value_to_block(val: Value) -> Block { + Block::from_u32(val.as_u32()) + } + + // NOTE: the only uses of this function should be internal to BlockCall. See the block comment + // on BlockCall for more context. + fn block_to_value(block: Block) -> Value { + Value::from_u32(block.as_u32()) + } + + /// Construct a BlockCall with the given block and arguments. + pub fn new(block: Block, args: &[Value], pool: &mut ValueListPool) -> Self { + let mut values = ValueList::default(); + values.push(Self::block_to_value(block), pool); + values.extend(args.iter().copied(), pool); + Self { values } + } + + /// Return the block for this BlockCall. + pub fn block(&self, pool: &ValueListPool) -> Block { + let val = self.values.first(pool).unwrap(); + Self::value_to_block(val) + } + + /// Replace the block for this BlockCall. + pub fn set_block(&mut self, block: Block, pool: &mut ValueListPool) { + *self.values.get_mut(0, pool).unwrap() = Self::block_to_value(block); + } + + /// Append an argument to the block args. + pub fn append_argument(&mut self, arg: Value, pool: &mut ValueListPool) { + self.values.push(arg, pool); + } + + /// Return a slice for the arguments of this block. + pub fn args_slice<'a>(&self, pool: &'a ValueListPool) -> &'a [Value] { + &self.values.as_slice(pool)[1..] + } + + /// Return a slice for the arguments of this block. + pub fn args_slice_mut<'a>(&'a mut self, pool: &'a mut ValueListPool) -> &'a mut [Value] { + &mut self.values.as_mut_slice(pool)[1..] + } + + /// Remove the argument at ix from the argument list. + pub fn remove(&mut self, ix: usize, pool: &mut ValueListPool) { + self.values.remove(1 + ix, pool) + } + + /// Clear out the arguments list. + pub fn clear(&mut self, pool: &mut ValueListPool) { + self.values.truncate(1, pool) + } + + /// Appends multiple elements to the arguments. + pub fn extend(&mut self, elements: I, pool: &mut ValueListPool) + where + I: IntoIterator, + { + self.values.extend(elements, pool) + } + + /// Return a value that can display this block call. + pub fn display<'a>(&self, pool: &'a ValueListPool) -> DisplayBlockCall<'a> { + DisplayBlockCall { block: *self, pool } + } + + /// Deep-clone the underlying list in the same pool. The returned + /// list will have identical contents but changes to this list + /// will not change its contents or vice-versa. + pub fn deep_clone(&self, pool: &mut ValueListPool) -> Self { + Self { + values: self.values.deep_clone(pool), + } + } +} + +/// Wrapper for the context needed to display a [BlockCall] value. +pub struct DisplayBlockCall<'a> { + block: BlockCall, + pool: &'a ValueListPool, +} + +impl<'a> Display for DisplayBlockCall<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.block.block(&self.pool))?; + let args = self.block.args_slice(&self.pool); + if !args.is_empty() { + write!(f, "(")?; + for (ix, arg) in args.iter().enumerate() { + if ix > 0 { + write!(f, ", ")?; + } + write!(f, "{arg}")?; + } + write!(f, ")")?; + } + Ok(()) + } +} + +// Include code generated by `cranelift-codegen/meta/src/gen_inst.rs`. This file contains: +// +// - The `pub enum InstructionFormat` enum with all the instruction formats. +// - The `pub enum InstructionData` enum with all the instruction data fields. +// - The `pub enum Opcode` definition with all known opcodes, +// - The `const OPCODE_FORMAT: [InstructionFormat; N]` table. +// - The private `fn opcode_name(Opcode) -> &'static str` function, and +// - The hash table `const OPCODE_HASH_TABLE: [Opcode; N]`. +// +// For value type constraints: +// +// - The `const OPCODE_CONSTRAINTS : [OpcodeConstraints; N]` table. +// - The `const TYPE_SETS : [ValueTypeSet; N]` table. +// - The `const OPERAND_CONSTRAINTS : [OperandConstraint; N]` table. +// +include!(concat!(env!("OUT_DIR"), "/opcodes.rs")); + +impl Display for Opcode { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "{}", opcode_name(*self)) + } +} + +impl Opcode { + /// Get the instruction format for this opcode. + pub fn format(self) -> InstructionFormat { + OPCODE_FORMAT[self as usize - 1] + } + + /// Get the constraint descriptor for this opcode. + /// Panic if this is called on `NotAnOpcode`. + pub fn constraints(self) -> OpcodeConstraints { + OPCODE_CONSTRAINTS[self as usize - 1] + } + + /// Is this instruction a GC safepoint? + /// + /// Safepoints are all kinds of calls, except for tail calls. + #[inline] + pub fn is_safepoint(self) -> bool { + self.is_call() && !self.is_return() + } +} + +// This trait really belongs in cranelift-reader where it is used by the `.clif` file parser, but since +// it critically depends on the `opcode_name()` function which is needed here anyway, it lives in +// this module. This also saves us from running the build script twice to generate code for the two +// separate crates. +impl FromStr for Opcode { + type Err = &'static str; + + /// Parse an Opcode name from a string. + fn from_str(s: &str) -> Result { + use crate::constant_hash::{probe, simple_hash}; + + match probe::<&str, [Option]>(&OPCODE_HASH_TABLE, s, simple_hash(s)) { + Err(_) => Err("Unknown opcode"), + // We unwrap here because probe() should have ensured that the entry + // at this index is not None. + Ok(i) => Ok(OPCODE_HASH_TABLE[i].unwrap()), + } + } +} + +impl<'a> Table<&'a str> for [Option] { + fn len(&self) -> usize { + self.len() + } + + fn key(&self, idx: usize) -> Option<&'a str> { + self[idx].map(opcode_name) + } +} + +/// A variable list of `Value` operands used for function call arguments and passing arguments to +/// basic blocks. +#[derive(Clone, Debug)] +pub struct VariableArgs(Vec); + +impl VariableArgs { + /// Create an empty argument list. + pub fn new() -> Self { + Self(Vec::new()) + } + + /// Add an argument to the end. + pub fn push(&mut self, v: Value) { + self.0.push(v) + } + + /// Check if the list is empty. + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Convert this to a value list in `pool` with `fixed` prepended. + pub fn into_value_list(self, fixed: &[Value], pool: &mut ValueListPool) -> ValueList { + let mut vlist = ValueList::default(); + vlist.extend(fixed.iter().cloned(), pool); + vlist.extend(self.0, pool); + vlist + } +} + +// Coerce `VariableArgs` into a `&[Value]` slice. +impl Deref for VariableArgs { + type Target = [Value]; + + fn deref(&self) -> &[Value] { + &self.0 + } +} + +impl DerefMut for VariableArgs { + fn deref_mut(&mut self) -> &mut [Value] { + &mut self.0 + } +} + +impl Display for VariableArgs { + fn fmt(&self, fmt: &mut Formatter) -> fmt::Result { + for (i, val) in self.0.iter().enumerate() { + if i == 0 { + write!(fmt, "{val}")?; + } else { + write!(fmt, ", {val}")?; + } + } + Ok(()) + } +} + +impl Default for VariableArgs { + fn default() -> Self { + Self::new() + } +} + +/// Analyzing an instruction. +/// +/// Avoid large matches on instruction formats by using the methods defined here to examine +/// instructions. +impl InstructionData { + /// Get the destinations of this instruction, if it's a branch. + /// + /// `br_table` returns the empty slice. + pub fn branch_destination<'a>(&'a self, jump_tables: &'a ir::JumpTables) -> &'a [BlockCall] { + match self { + Self::Jump { destination, .. } => std::slice::from_ref(destination), + Self::Brif { blocks, .. } => blocks.as_slice(), + Self::BranchTable { table, .. } => jump_tables.get(*table).unwrap().all_branches(), + _ => { + debug_assert!(!self.opcode().is_branch()); + &[] + } + } + } + + /// Get a mutable slice of the destinations of this instruction, if it's a branch. + /// + /// `br_table` returns the empty slice. + pub fn branch_destination_mut<'a>( + &'a mut self, + jump_tables: &'a mut ir::JumpTables, + ) -> &'a mut [BlockCall] { + match self { + Self::Jump { destination, .. } => std::slice::from_mut(destination), + Self::Brif { blocks, .. } => blocks.as_mut_slice(), + Self::BranchTable { table, .. } => { + jump_tables.get_mut(*table).unwrap().all_branches_mut() + } + _ => { + debug_assert!(!self.opcode().is_branch()); + &mut [] + } + } + } + + /// Replace the values used in this instruction according to the given + /// function. + pub fn map_values( + &mut self, + pool: &mut ValueListPool, + jump_tables: &mut ir::JumpTables, + mut f: impl FnMut(Value) -> Value, + ) { + for arg in self.arguments_mut(pool) { + *arg = f(*arg); + } + + for block in self.branch_destination_mut(jump_tables) { + for arg in block.args_slice_mut(pool) { + *arg = f(*arg); + } + } + } + + /// If this is a trapping instruction, get its trap code. Otherwise, return + /// `None`. + pub fn trap_code(&self) -> Option { + match *self { + Self::CondTrap { code, .. } | Self::Trap { code, .. } => Some(code), + _ => None, + } + } + + /// If this is a control-flow instruction depending on an integer condition, gets its + /// condition. Otherwise, return `None`. + pub fn cond_code(&self) -> Option { + match self { + &InstructionData::IntCompare { cond, .. } + | &InstructionData::IntCompareImm { cond, .. } => Some(cond), + _ => None, + } + } + + /// If this is a control-flow instruction depending on a floating-point condition, gets its + /// condition. Otherwise, return `None`. + pub fn fp_cond_code(&self) -> Option { + match self { + &InstructionData::FloatCompare { cond, .. } => Some(cond), + _ => None, + } + } + + /// If this is a trapping instruction, get an exclusive reference to its + /// trap code. Otherwise, return `None`. + pub fn trap_code_mut(&mut self) -> Option<&mut TrapCode> { + match self { + Self::CondTrap { code, .. } | Self::Trap { code, .. } => Some(code), + _ => None, + } + } + + /// If this is an atomic read/modify/write instruction, return its subopcode. + pub fn atomic_rmw_op(&self) -> Option { + match self { + &InstructionData::AtomicRmw { op, .. } => Some(op), + _ => None, + } + } + + /// If this is a load/store instruction, returns its immediate offset. + pub fn load_store_offset(&self) -> Option { + match self { + &InstructionData::Load { offset, .. } + | &InstructionData::StackLoad { offset, .. } + | &InstructionData::Store { offset, .. } + | &InstructionData::StackStore { offset, .. } => Some(offset.into()), + _ => None, + } + } + + /// If this is a load/store instruction, return its memory flags. + pub fn memflags(&self) -> Option { + match self { + &InstructionData::Load { flags, .. } + | &InstructionData::LoadNoOffset { flags, .. } + | &InstructionData::Store { flags, .. } + | &InstructionData::StoreNoOffset { flags, .. } + | &InstructionData::AtomicCas { flags, .. } + | &InstructionData::AtomicRmw { flags, .. } => Some(flags), + _ => None, + } + } + + /// If this instruction references a stack slot, return it + pub fn stack_slot(&self) -> Option { + match self { + &InstructionData::StackStore { stack_slot, .. } + | &InstructionData::StackLoad { stack_slot, .. } => Some(stack_slot), + _ => None, + } + } + + /// Return information about a call instruction. + /// + /// Any instruction that can call another function reveals its call signature here. + pub fn analyze_call<'a>(&'a self, pool: &'a ValueListPool) -> CallInfo<'a> { + match *self { + Self::Call { + func_ref, ref args, .. + } => CallInfo::Direct(func_ref, args.as_slice(pool)), + Self::CallIndirect { + sig_ref, ref args, .. + } => CallInfo::Indirect(sig_ref, &args.as_slice(pool)[1..]), + Self::Ternary { + opcode: Opcode::StackSwitch, + .. + } => { + // `StackSwitch` is not actually a call, but has the .call() side + // effect as it continues execution elsewhere. + CallInfo::NotACall + } + _ => { + debug_assert!(!self.opcode().is_call()); + CallInfo::NotACall + } + } + } + + #[inline] + pub(crate) fn mask_immediates(&mut self, ctrl_typevar: Type) { + if ctrl_typevar.is_invalid() { + return; + } + + let bit_width = ctrl_typevar.bits(); + + match self { + Self::UnaryImm { opcode: _, imm } => { + *imm = imm.mask_to_width(bit_width); + } + Self::BinaryImm64 { + opcode, + arg: _, + imm, + } => { + if *opcode == Opcode::SdivImm || *opcode == Opcode::SremImm { + *imm = imm.mask_to_width(bit_width); + } + } + Self::IntCompareImm { + opcode, + arg: _, + cond, + imm, + } => { + debug_assert_eq!(*opcode, Opcode::IcmpImm); + if cond.unsigned() != *cond { + *imm = imm.mask_to_width(bit_width); + } + } + _ => {} + } + } +} + +/// Information about call instructions. +pub enum CallInfo<'a> { + /// This is not a call instruction. + NotACall, + + /// This is a direct call to an external function declared in the preamble. See + /// `DataFlowGraph.ext_funcs`. + Direct(FuncRef, &'a [Value]), + + /// This is an indirect call with the specified signature. See `DataFlowGraph.signatures`. + Indirect(SigRef, &'a [Value]), +} + +/// Value type constraints for a given opcode. +/// +/// The `InstructionFormat` determines the constraints on most operands, but `Value` operands and +/// results are not determined by the format. Every `Opcode` has an associated +/// `OpcodeConstraints` object that provides the missing details. +#[derive(Clone, Copy)] +pub struct OpcodeConstraints { + /// Flags for this opcode encoded as a bit field: + /// + /// Bits 0-2: + /// Number of fixed result values. This does not include `variable_args` results as are + /// produced by call instructions. + /// + /// Bit 3: + /// This opcode is polymorphic and the controlling type variable can be inferred from the + /// designated input operand. This is the `typevar_operand` index given to the + /// `InstructionFormat` meta language object. When this bit is not set, the controlling + /// type variable must be the first output value instead. + /// + /// Bit 4: + /// This opcode is polymorphic and the controlling type variable does *not* appear as the + /// first result type. + /// + /// Bits 5-7: + /// Number of fixed value arguments. The minimum required number of value operands. + flags: u8, + + /// Permitted set of types for the controlling type variable as an index into `TYPE_SETS`. + typeset_offset: u8, + + /// Offset into `OPERAND_CONSTRAINT` table of the descriptors for this opcode. The first + /// `num_fixed_results()` entries describe the result constraints, then follows constraints for + /// the fixed `Value` input operands. (`num_fixed_value_arguments()` of them). + constraint_offset: u16, +} + +impl OpcodeConstraints { + /// Can the controlling type variable for this opcode be inferred from the designated value + /// input operand? + /// This also implies that this opcode is polymorphic. + pub fn use_typevar_operand(self) -> bool { + (self.flags & 0x8) != 0 + } + + /// Is it necessary to look at the designated value input operand in order to determine the + /// controlling type variable, or is it good enough to use the first return type? + /// + /// Most polymorphic instructions produce a single result with the type of the controlling type + /// variable. A few polymorphic instructions either don't produce any results, or produce + /// results with a fixed type. These instructions return `true`. + pub fn requires_typevar_operand(self) -> bool { + (self.flags & 0x10) != 0 + } + + /// Get the number of *fixed* result values produced by this opcode. + /// This does not include `variable_args` produced by calls. + pub fn num_fixed_results(self) -> usize { + (self.flags & 0x7) as usize + } + + /// Get the number of *fixed* input values required by this opcode. + /// + /// This does not include `variable_args` arguments on call and branch instructions. + /// + /// The number of fixed input values is usually implied by the instruction format, but + /// instruction formats that use a `ValueList` put both fixed and variable arguments in the + /// list. This method returns the *minimum* number of values required in the value list. + pub fn num_fixed_value_arguments(self) -> usize { + ((self.flags >> 5) & 0x7) as usize + } + + /// Get the offset into `TYPE_SETS` for the controlling type variable. + /// Returns `None` if the instruction is not polymorphic. + fn typeset_offset(self) -> Option { + let offset = usize::from(self.typeset_offset); + if offset < TYPE_SETS.len() { + Some(offset) + } else { + None + } + } + + /// Get the offset into OPERAND_CONSTRAINTS where the descriptors for this opcode begin. + fn constraint_offset(self) -> usize { + self.constraint_offset as usize + } + + /// Get the value type of result number `n`, having resolved the controlling type variable to + /// `ctrl_type`. + pub fn result_type(self, n: usize, ctrl_type: Type) -> Type { + debug_assert!(n < self.num_fixed_results(), "Invalid result index"); + match OPERAND_CONSTRAINTS[self.constraint_offset() + n].resolve(ctrl_type) { + ResolvedConstraint::Bound(t) => t, + ResolvedConstraint::Free(ts) => panic!("Result constraints can't be free: {ts:?}"), + } + } + + /// Get the value type of input value number `n`, having resolved the controlling type variable + /// to `ctrl_type`. + /// + /// Unlike results, it is possible for some input values to vary freely within a specific + /// `ValueTypeSet`. This is represented with the `ArgumentConstraint::Free` variant. + pub fn value_argument_constraint(self, n: usize, ctrl_type: Type) -> ResolvedConstraint { + debug_assert!( + n < self.num_fixed_value_arguments(), + "Invalid value argument index" + ); + let offset = self.constraint_offset() + self.num_fixed_results(); + OPERAND_CONSTRAINTS[offset + n].resolve(ctrl_type) + } + + /// Get the typeset of allowed types for the controlling type variable in a polymorphic + /// instruction. + pub fn ctrl_typeset(self) -> Option { + self.typeset_offset().map(|offset| TYPE_SETS[offset]) + } + + /// Is this instruction polymorphic? + pub fn is_polymorphic(self) -> bool { + self.ctrl_typeset().is_some() + } +} + +type BitSet8 = ScalarBitSet; +type BitSet16 = ScalarBitSet; + +/// A value type set describes the permitted set of types for a type variable. +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] +pub struct ValueTypeSet { + /// Allowed lane sizes + pub lanes: BitSet16, + /// Allowed int widths + pub ints: BitSet8, + /// Allowed float widths + pub floats: BitSet8, + /// Allowed dynamic vectors minimum lane sizes + pub dynamic_lanes: BitSet16, +} + +impl ValueTypeSet { + /// Is `scalar` part of the base type set? + /// + /// Note that the base type set does not have to be included in the type set proper. + fn is_base_type(self, scalar: Type) -> bool { + let l2b = u8::try_from(scalar.log2_lane_bits()).unwrap(); + if scalar.is_int() { + self.ints.contains(l2b) + } else if scalar.is_float() { + self.floats.contains(l2b) + } else { + false + } + } + + /// Does `typ` belong to this set? + pub fn contains(self, typ: Type) -> bool { + if typ.is_dynamic_vector() { + let l2l = u8::try_from(typ.log2_min_lane_count()).unwrap(); + self.dynamic_lanes.contains(l2l) && self.is_base_type(typ.lane_type()) + } else { + let l2l = u8::try_from(typ.log2_lane_count()).unwrap(); + self.lanes.contains(l2l) && self.is_base_type(typ.lane_type()) + } + } + + /// Get an example member of this type set. + /// + /// This is used for error messages to avoid suggesting invalid types. + pub fn example(self) -> Type { + let t = if self.ints.max().unwrap_or(0) > 5 { + types::I32 + } else if self.floats.max().unwrap_or(0) > 5 { + types::F32 + } else { + types::I8 + }; + t.by(1 << self.lanes.min().unwrap()).unwrap() + } +} + +/// Operand constraints. This describes the value type constraints on a single `Value` operand. +enum OperandConstraint { + /// This operand has a concrete value type. + Concrete(Type), + + /// This operand can vary freely within the given type set. + /// The type set is identified by its index into the TYPE_SETS constant table. + Free(u8), + + /// This operand is the same type as the controlling type variable. + Same, + + /// This operand is `ctrlType.lane_of()`. + LaneOf, + + /// This operand is `ctrlType.as_truthy()`. + AsTruthy, + + /// This operand is `ctrlType.half_width()`. + HalfWidth, + + /// This operand is `ctrlType.double_width()`. + DoubleWidth, + + /// This operand is `ctrlType.split_lanes()`. + SplitLanes, + + /// This operand is `ctrlType.merge_lanes()`. + MergeLanes, + + /// This operands is `ctrlType.dynamic_to_vector()`. + DynamicToVector, + + /// This operand is `ctrlType.narrower()`. + Narrower, + + /// This operand is `ctrlType.wider()`. + Wider, +} + +impl OperandConstraint { + /// Resolve this operand constraint into a concrete value type, given the value of the + /// controlling type variable. + pub fn resolve(&self, ctrl_type: Type) -> ResolvedConstraint { + use self::OperandConstraint::*; + use self::ResolvedConstraint::Bound; + match *self { + Concrete(t) => Bound(t), + Free(vts) => ResolvedConstraint::Free(TYPE_SETS[vts as usize]), + Same => Bound(ctrl_type), + LaneOf => Bound(ctrl_type.lane_of()), + AsTruthy => Bound(ctrl_type.as_truthy()), + HalfWidth => Bound(ctrl_type.half_width().expect("invalid type for half_width")), + DoubleWidth => Bound( + ctrl_type + .double_width() + .expect("invalid type for double_width"), + ), + SplitLanes => { + if ctrl_type.is_dynamic_vector() { + Bound( + ctrl_type + .dynamic_to_vector() + .expect("invalid type for dynamic_to_vector") + .split_lanes() + .expect("invalid type for split_lanes") + .vector_to_dynamic() + .expect("invalid dynamic type"), + ) + } else { + Bound( + ctrl_type + .split_lanes() + .expect("invalid type for split_lanes"), + ) + } + } + MergeLanes => { + if ctrl_type.is_dynamic_vector() { + Bound( + ctrl_type + .dynamic_to_vector() + .expect("invalid type for dynamic_to_vector") + .merge_lanes() + .expect("invalid type for merge_lanes") + .vector_to_dynamic() + .expect("invalid dynamic type"), + ) + } else { + Bound( + ctrl_type + .merge_lanes() + .expect("invalid type for merge_lanes"), + ) + } + } + DynamicToVector => Bound( + ctrl_type + .dynamic_to_vector() + .expect("invalid type for dynamic_to_vector"), + ), + Narrower => { + let ctrl_type_bits = ctrl_type.log2_lane_bits(); + let mut tys = ValueTypeSet::default(); + + // We're testing scalar values, only. + tys.lanes = ScalarBitSet::from_range(0, 1); + + if ctrl_type.is_int() { + // The upper bound in from_range is exclusive, and we want to exclude the + // control type to construct the interval of [I8, ctrl_type). + tys.ints = BitSet8::from_range(3, ctrl_type_bits as u8); + } else if ctrl_type.is_float() { + // The upper bound in from_range is exclusive, and we want to exclude the + // control type to construct the interval of [F16, ctrl_type). + tys.floats = BitSet8::from_range(4, ctrl_type_bits as u8); + } else { + panic!("The Narrower constraint only operates on floats or ints, got {ctrl_type:?}"); + } + ResolvedConstraint::Free(tys) + } + Wider => { + let ctrl_type_bits = ctrl_type.log2_lane_bits(); + let mut tys = ValueTypeSet::default(); + + // We're testing scalar values, only. + tys.lanes = ScalarBitSet::from_range(0, 1); + + if ctrl_type.is_int() { + let lower_bound = ctrl_type_bits as u8 + 1; + // The largest integer type we can represent in `BitSet8` is I128, which is + // represented by bit 7 in the bit set. Adding one to exclude I128 from the + // lower bound would overflow as 2^8 doesn't fit in a u8, but this would + // already describe the empty set so instead we leave `ints` in its default + // empty state. + if lower_bound < BitSet8::capacity() { + // The interval should include all types wider than `ctrl_type`, so we use + // `2^8` as the upper bound, and add one to the bits of `ctrl_type` to define + // the interval `(ctrl_type, I128]`. + tys.ints = BitSet8::from_range(lower_bound, 8); + } + } else if ctrl_type.is_float() { + // Same as above but for `tys.floats`, as the largest float type is F128. + let lower_bound = ctrl_type_bits as u8 + 1; + if lower_bound < BitSet8::capacity() { + tys.floats = BitSet8::from_range(lower_bound, 8); + } + } else { + panic!( + "The Wider constraint only operates on floats or ints, got {ctrl_type:?}" + ); + } + + ResolvedConstraint::Free(tys) + } + } + } +} + +/// The type constraint on a value argument once the controlling type variable is known. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum ResolvedConstraint { + /// The operand is bound to a known type. + Bound(Type), + /// The operand type can vary freely within the given set. + Free(ValueTypeSet), +} + +#[cfg(test)] +mod tests { + use super::*; + use alloc::string::ToString; + + #[test] + fn inst_data_is_copy() { + fn is_copy() {} + is_copy::(); + } + + #[test] + fn inst_data_size() { + // The size of `InstructionData` is performance sensitive, so make sure + // we don't regress it unintentionally. + assert_eq!(std::mem::size_of::(), 16); + } + + #[test] + fn opcodes() { + use core::mem; + + let x = Opcode::Iadd; + let mut y = Opcode::Isub; + + assert!(x != y); + y = Opcode::Iadd; + assert_eq!(x, y); + assert_eq!(x.format(), InstructionFormat::Binary); + + assert_eq!(format!("{:?}", Opcode::IaddImm), "IaddImm"); + assert_eq!(Opcode::IaddImm.to_string(), "iadd_imm"); + + // Check the matcher. + assert_eq!("iadd".parse::(), Ok(Opcode::Iadd)); + assert_eq!("iadd_imm".parse::(), Ok(Opcode::IaddImm)); + assert_eq!("iadd\0".parse::(), Err("Unknown opcode")); + assert_eq!("".parse::(), Err("Unknown opcode")); + assert_eq!("\0".parse::(), Err("Unknown opcode")); + + // Opcode is a single byte, and because Option originally came to 2 bytes, early on + // Opcode included a variant NotAnOpcode to avoid the unnecessary bloat. Since then the Rust + // compiler has brought in NonZero optimization, meaning that an enum not using the 0 value + // can be optional for no size cost. We want to ensure Option remains small. + assert_eq!(mem::size_of::(), mem::size_of::>()); + } + + #[test] + fn instruction_data() { + use core::mem; + // The size of the `InstructionData` enum is important for performance. It should not + // exceed 16 bytes. Use `Box` out-of-line payloads for instruction formats that + // require more space than that. It would be fine with a data structure smaller than 16 + // bytes, but what are the odds of that? + assert_eq!(mem::size_of::(), 16); + } + + #[test] + fn constraints() { + let a = Opcode::Iadd.constraints(); + assert!(a.use_typevar_operand()); + assert!(!a.requires_typevar_operand()); + assert_eq!(a.num_fixed_results(), 1); + assert_eq!(a.num_fixed_value_arguments(), 2); + assert_eq!(a.result_type(0, types::I32), types::I32); + assert_eq!(a.result_type(0, types::I8), types::I8); + assert_eq!( + a.value_argument_constraint(0, types::I32), + ResolvedConstraint::Bound(types::I32) + ); + assert_eq!( + a.value_argument_constraint(1, types::I32), + ResolvedConstraint::Bound(types::I32) + ); + + let b = Opcode::Bitcast.constraints(); + assert!(!b.use_typevar_operand()); + assert!(!b.requires_typevar_operand()); + assert_eq!(b.num_fixed_results(), 1); + assert_eq!(b.num_fixed_value_arguments(), 1); + assert_eq!(b.result_type(0, types::I32), types::I32); + assert_eq!(b.result_type(0, types::I8), types::I8); + match b.value_argument_constraint(0, types::I32) { + ResolvedConstraint::Free(vts) => assert!(vts.contains(types::F32)), + _ => panic!("Unexpected constraint from value_argument_constraint"), + } + + let c = Opcode::Call.constraints(); + assert_eq!(c.num_fixed_results(), 0); + assert_eq!(c.num_fixed_value_arguments(), 0); + + let i = Opcode::CallIndirect.constraints(); + assert_eq!(i.num_fixed_results(), 0); + assert_eq!(i.num_fixed_value_arguments(), 1); + + let cmp = Opcode::Icmp.constraints(); + assert!(cmp.use_typevar_operand()); + assert!(cmp.requires_typevar_operand()); + assert_eq!(cmp.num_fixed_results(), 1); + assert_eq!(cmp.num_fixed_value_arguments(), 2); + assert_eq!(cmp.result_type(0, types::I64), types::I8); + } + + #[test] + fn value_set() { + use crate::ir::types::*; + + let vts = ValueTypeSet { + lanes: BitSet16::from_range(0, 8), + ints: BitSet8::from_range(4, 7), + floats: BitSet8::from_range(0, 0), + dynamic_lanes: BitSet16::from_range(0, 4), + }; + assert!(!vts.contains(I8)); + assert!(vts.contains(I32)); + assert!(vts.contains(I64)); + assert!(vts.contains(I32X4)); + assert!(vts.contains(I32X4XN)); + assert!(!vts.contains(F16)); + assert!(!vts.contains(F32)); + assert!(!vts.contains(F128)); + assert_eq!(vts.example().to_string(), "i32"); + + let vts = ValueTypeSet { + lanes: BitSet16::from_range(0, 8), + ints: BitSet8::from_range(0, 0), + floats: BitSet8::from_range(5, 7), + dynamic_lanes: BitSet16::from_range(0, 8), + }; + assert_eq!(vts.example().to_string(), "f32"); + + let vts = ValueTypeSet { + lanes: BitSet16::from_range(1, 8), + ints: BitSet8::from_range(0, 0), + floats: BitSet8::from_range(5, 7), + dynamic_lanes: BitSet16::from_range(0, 8), + }; + assert_eq!(vts.example().to_string(), "f32x2"); + + let vts = ValueTypeSet { + lanes: BitSet16::from_range(2, 8), + ints: BitSet8::from_range(3, 7), + floats: BitSet8::from_range(0, 0), + dynamic_lanes: BitSet16::from_range(0, 8), + }; + assert_eq!(vts.example().to_string(), "i32x4"); + + let vts = ValueTypeSet { + // TypeSet(lanes=(1, 256), ints=(8, 64)) + lanes: BitSet16::from_range(0, 9), + ints: BitSet8::from_range(3, 7), + floats: BitSet8::from_range(0, 0), + dynamic_lanes: BitSet16::from_range(0, 8), + }; + assert!(vts.contains(I32)); + assert!(vts.contains(I32X4)); + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/jumptable.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/jumptable.rs new file mode 100644 index 000000000..8e1e15c7d --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/jumptable.rs @@ -0,0 +1,168 @@ +//! Jump table representation. +//! +//! Jump tables are declared in the preamble and assigned an `ir::entities::JumpTable` reference. +//! The actual table of destinations is stored in a `JumpTableData` struct defined in this module. + +use crate::ir::instructions::ValueListPool; +use crate::ir::BlockCall; +use alloc::vec::Vec; +use core::fmt::{self, Display, Formatter}; +use core::slice::{Iter, IterMut}; + +#[cfg(feature = "enable-serde")] +use serde_derive::{Deserialize, Serialize}; + +/// Contents of a jump table. +/// +/// All jump tables use 0-based indexing and are densely populated. +/// +/// The default block for the jump table is stored as the first element of the underlying vector. +/// It can be accessed through the `default_block` and `default_block_mut` functions. All blocks +/// may be iterated using the `all_branches` and `all_branches_mut` functions, which will both +/// iterate over the default block first. +#[derive(Debug, Clone, PartialEq, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct JumpTableData { + // Table entries. + table: Vec, +} + +impl JumpTableData { + /// Create a new jump table with the provided blocks. + pub fn new(def: BlockCall, table: &[BlockCall]) -> Self { + Self { + table: std::iter::once(def).chain(table.iter().copied()).collect(), + } + } + + /// Fetch the default block for this jump table. + pub fn default_block(&self) -> BlockCall { + *self.table.first().unwrap() + } + + /// Mutable access to the default block of this jump table. + pub fn default_block_mut(&mut self) -> &mut BlockCall { + self.table.first_mut().unwrap() + } + + /// The jump table and default block as a single slice. The default block will always be first. + pub fn all_branches(&self) -> &[BlockCall] { + self.table.as_slice() + } + + /// The jump table and default block as a single mutable slice. The default block will always + /// be first. + pub fn all_branches_mut(&mut self) -> &mut [BlockCall] { + self.table.as_mut_slice() + } + + /// Access the jump table as a slice. This excludes the default block. + pub fn as_slice(&self) -> &[BlockCall] { + &self.table.as_slice()[1..] + } + + /// Access the jump table as a mutable slice. This excludes the default block. + pub fn as_mut_slice(&mut self) -> &mut [BlockCall] { + &mut self.table.as_mut_slice()[1..] + } + + /// Returns an iterator to the jump table, excluding the default block. + #[deprecated(since = "7.0.0", note = "please use `.as_slice()` instead")] + pub fn iter(&self) -> Iter { + self.as_slice().iter() + } + + /// Returns an iterator that allows modifying each value, excluding the default block. + #[deprecated(since = "7.0.0", note = "please use `.as_mut_slice()` instead")] + pub fn iter_mut(&mut self) -> IterMut { + self.as_mut_slice().iter_mut() + } + + /// Clears all entries in this jump table, except for the default block. + pub fn clear(&mut self) { + self.table.drain(1..); + } + + /// Return a value that can display the contents of this jump table. + pub fn display<'a>(&'a self, pool: &'a ValueListPool) -> DisplayJumpTable<'a> { + DisplayJumpTable { jt: self, pool } + } +} + +/// A wrapper for the context required to display a [JumpTableData]. +pub struct DisplayJumpTable<'a> { + jt: &'a JumpTableData, + pool: &'a ValueListPool, +} + +impl<'a> Display for DisplayJumpTable<'a> { + fn fmt(&self, fmt: &mut Formatter<'_>) -> fmt::Result { + write!(fmt, "{}, [", self.jt.default_block().display(self.pool))?; + if let Some((first, rest)) = self.jt.as_slice().split_first() { + write!(fmt, "{}", first.display(self.pool))?; + for block in rest { + write!(fmt, ", {}", block.display(self.pool))?; + } + } + write!(fmt, "]") + } +} + +#[cfg(test)] +mod tests { + use super::JumpTableData; + use crate::entity::EntityRef; + use crate::ir::instructions::ValueListPool; + use crate::ir::{Block, BlockCall, Value}; + use std::string::ToString; + + #[test] + fn empty() { + let mut pool = ValueListPool::default(); + let def = BlockCall::new(Block::new(0), &[], &mut pool); + + let jt = JumpTableData::new(def, &[]); + + assert_eq!(jt.all_branches().get(0), Some(&def)); + + assert_eq!(jt.as_slice().get(0), None); + assert_eq!(jt.as_slice().get(10), None); + + assert_eq!(jt.display(&pool).to_string(), "block0, []"); + + assert_eq!(jt.all_branches(), [def]); + assert_eq!(jt.as_slice(), []); + } + + #[test] + fn insert() { + let mut pool = ValueListPool::default(); + + let v0 = Value::new(0); + let v1 = Value::new(1); + + let e0 = Block::new(0); + let e1 = Block::new(1); + let e2 = Block::new(2); + + let def = BlockCall::new(e0, &[], &mut pool); + let b1 = BlockCall::new(e1, &[v0], &mut pool); + let b2 = BlockCall::new(e2, &[], &mut pool); + let b3 = BlockCall::new(e1, &[v1], &mut pool); + + let jt = JumpTableData::new(def, &[b1, b2, b3]); + + assert_eq!(jt.default_block(), def); + assert_eq!( + jt.display(&pool).to_string(), + "block0, [block1(v0), block2, block1(v1)]" + ); + + assert_eq!(jt.all_branches(), [def, b1, b2, b3]); + assert_eq!(jt.as_slice(), [b1, b2, b3]); + + assert_eq!(jt.as_slice()[0].args_slice(&pool), [v0]); + assert_eq!(jt.as_slice()[1].args_slice(&pool), []); + assert_eq!(jt.as_slice()[2].args_slice(&pool), [v1]); + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/known_symbol.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/known_symbol.rs new file mode 100644 index 000000000..c3d17501a --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/known_symbol.rs @@ -0,0 +1,47 @@ +use core::fmt; +use core::str::FromStr; +#[cfg(feature = "enable-serde")] +use serde_derive::{Deserialize, Serialize}; + +/// A well-known symbol. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub enum KnownSymbol { + /// ELF well-known linker symbol _GLOBAL_OFFSET_TABLE_ + ElfGlobalOffsetTable, + /// TLS index symbol for the current thread. + /// Used in COFF/PE file formats. + CoffTlsIndex, +} + +impl fmt::Display for KnownSymbol { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Debug::fmt(self, f) + } +} + +impl FromStr for KnownSymbol { + type Err = (); + + fn from_str(s: &str) -> Result { + match s { + "ElfGlobalOffsetTable" => Ok(Self::ElfGlobalOffsetTable), + "CoffTlsIndex" => Ok(Self::CoffTlsIndex), + _ => Err(()), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parsing() { + assert_eq!( + "ElfGlobalOffsetTable".parse(), + Ok(KnownSymbol::ElfGlobalOffsetTable) + ); + assert_eq!("CoffTlsIndex".parse(), Ok(KnownSymbol::CoffTlsIndex)); + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/layout.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/layout.rs new file mode 100644 index 000000000..4e6ca774c --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/layout.rs @@ -0,0 +1,1193 @@ +//! Function layout. +//! +//! The order of basic blocks in a function and the order of instructions in a block is +//! determined by the `Layout` data structure defined in this module. + +use crate::entity::SecondaryMap; +use crate::ir::progpoint::ProgramPoint; +use crate::ir::{Block, Inst}; +use crate::packed_option::PackedOption; +use crate::{timing, trace}; +use core::cmp; + +/// The `Layout` struct determines the layout of blocks and instructions in a function. It does not +/// contain definitions of instructions or blocks, but depends on `Inst` and `Block` entity references +/// being defined elsewhere. +/// +/// This data structure determines: +/// +/// - The order of blocks in the function. +/// - Which block contains a given instruction. +/// - The order of instructions with a block. +/// +/// While data dependencies are not recorded, instruction ordering does affect control +/// dependencies, so part of the semantics of the program are determined by the layout. +/// +#[derive(Debug, Clone, PartialEq, Hash)] +pub struct Layout { + /// Linked list nodes for the layout order of blocks Forms a doubly linked list, terminated in + /// both ends by `None`. + blocks: SecondaryMap, + + /// Linked list nodes for the layout order of instructions. Forms a double linked list per block, + /// terminated in both ends by `None`. + insts: SecondaryMap, + + /// First block in the layout order, or `None` when no blocks have been laid out. + first_block: Option, + + /// Last block in the layout order, or `None` when no blocks have been laid out. + last_block: Option, +} + +impl Layout { + /// Create a new empty `Layout`. + pub fn new() -> Self { + Self { + blocks: SecondaryMap::new(), + insts: SecondaryMap::new(), + first_block: None, + last_block: None, + } + } + + /// Clear the layout. + pub fn clear(&mut self) { + self.blocks.clear(); + self.insts.clear(); + self.first_block = None; + self.last_block = None; + } + + /// Returns the capacity of the `BlockData` map. + pub fn block_capacity(&self) -> usize { + self.blocks.capacity() + } +} + +/// Sequence numbers. +/// +/// All instructions are given a sequence number that can be used to quickly determine +/// their relative position in a block. The sequence numbers are not contiguous, but are assigned +/// like line numbers in BASIC: 10, 20, 30, ... +/// +/// Sequence numbers are strictly increasing within a block, but are reset between blocks. +/// +/// The result is that sequence numbers work like BASIC line numbers for the textual form of the IR. +type SequenceNumber = u32; + +/// Initial stride assigned to new sequence numbers. +const MAJOR_STRIDE: SequenceNumber = 10; + +/// Secondary stride used when renumbering locally. +const MINOR_STRIDE: SequenceNumber = 2; + +/// Limit on the sequence number range we'll renumber locally. If this limit is exceeded, we'll +/// switch to a full block renumbering. +const LOCAL_LIMIT: SequenceNumber = 100 * MINOR_STRIDE; + +/// Compute the midpoint between `a` and `b`. +/// Return `None` if the midpoint would be equal to either. +fn midpoint(a: SequenceNumber, b: SequenceNumber) -> Option { + debug_assert!(a < b); + // Avoid integer overflow. + let m = a + (b - a) / 2; + if m > a { + Some(m) + } else { + None + } +} + +impl Layout { + /// Compare the program points `a` and `b` in the same block relative to this program order. + /// + /// Return `Less` if `a` appears in the program before `b`. + /// + /// This is declared as a generic such that it can be called with `Inst` and `Block` arguments + /// directly. Depending on the implementation, there is a good chance performance will be + /// improved for those cases where the type of either argument is known statically. + pub fn pp_cmp(&self, a: A, b: B) -> cmp::Ordering + where + A: Into, + B: Into, + { + let a = a.into(); + let b = b.into(); + debug_assert_eq!(self.pp_block(a), self.pp_block(b)); + let a_seq = match a { + ProgramPoint::Block(_block) => 0, + ProgramPoint::Inst(inst) => self.insts[inst].seq, + }; + let b_seq = match b { + ProgramPoint::Block(_block) => 0, + ProgramPoint::Inst(inst) => self.insts[inst].seq, + }; + a_seq.cmp(&b_seq) + } +} + +// Private methods for dealing with sequence numbers. +impl Layout { + /// Assign a valid sequence number to `inst` such that the numbers are still monotonic. This may + /// require renumbering. + fn assign_inst_seq(&mut self, inst: Inst) { + // Get the sequence number immediately before `inst`. + let prev_seq = match self.insts[inst].prev.expand() { + Some(prev_inst) => self.insts[prev_inst].seq, + None => 0, + }; + + // Get the sequence number immediately following `inst`. + let next_seq = if let Some(next_inst) = self.insts[inst].next.expand() { + self.insts[next_inst].seq + } else { + // There is nothing after `inst`. We can just use a major stride. + self.insts[inst].seq = prev_seq + MAJOR_STRIDE; + return; + }; + + // Check if there is room between these sequence numbers. + if let Some(seq) = midpoint(prev_seq, next_seq) { + self.insts[inst].seq = seq; + } else { + // No available integers between `prev_seq` and `next_seq`. We have to renumber. + self.renumber_insts(inst, prev_seq + MINOR_STRIDE, prev_seq + LOCAL_LIMIT); + } + } + + /// Renumber instructions starting from `inst` until the end of the block or until numbers catch + /// up. + /// + /// If sequence numbers exceed `limit`, switch to a full block renumbering. + fn renumber_insts(&mut self, inst: Inst, seq: SequenceNumber, limit: SequenceNumber) { + let mut inst = inst; + let mut seq = seq; + + loop { + self.insts[inst].seq = seq; + + // Next instruction. + inst = match self.insts[inst].next.expand() { + None => return, + Some(next) => next, + }; + + if seq < self.insts[inst].seq { + // Sequence caught up. + return; + } + + if seq > limit { + // We're pushing too many instructions in front of us. + // Switch to a full block renumbering to make some space. + self.full_block_renumber( + self.inst_block(inst) + .expect("inst must be inserted before assigning an seq"), + ); + return; + } + + seq += MINOR_STRIDE; + } + } + + /// Renumber all instructions in a block. + /// + /// This doesn't affect the position of anything, but it gives more room in the internal + /// sequence numbers for inserting instructions later. + fn full_block_renumber(&mut self, block: Block) { + let _tt = timing::layout_renumber(); + // Avoid 0 as this is reserved for the program point indicating the block itself + let mut seq = MAJOR_STRIDE; + let mut next_inst = self.blocks[block].first_inst.expand(); + while let Some(inst) = next_inst { + self.insts[inst].seq = seq; + seq += MAJOR_STRIDE; + next_inst = self.insts[inst].next.expand(); + } + + trace!("Renumbered {} program points", seq / MAJOR_STRIDE); + } +} + +/// Methods for laying out blocks. +/// +/// An unknown block starts out as *not inserted* in the block layout. The layout is a linear order of +/// inserted blocks. Once a block has been inserted in the layout, instructions can be added. A block +/// can only be removed from the layout when it is empty. +/// +/// Since every block must end with a terminator instruction which cannot fall through, the layout of +/// blocks do not affect the semantics of the program. +/// +impl Layout { + /// Is `block` currently part of the layout? + pub fn is_block_inserted(&self, block: Block) -> bool { + Some(block) == self.first_block || self.blocks[block].prev.is_some() + } + + /// Insert `block` as the last block in the layout. + pub fn append_block(&mut self, block: Block) { + debug_assert!( + !self.is_block_inserted(block), + "Cannot append block that is already in the layout" + ); + { + let node = &mut self.blocks[block]; + debug_assert!(node.first_inst.is_none() && node.last_inst.is_none()); + node.prev = self.last_block.into(); + node.next = None.into(); + } + if let Some(last) = self.last_block { + self.blocks[last].next = block.into(); + } else { + self.first_block = Some(block); + } + self.last_block = Some(block); + } + + /// Insert `block` in the layout before the existing block `before`. + pub fn insert_block(&mut self, block: Block, before: Block) { + debug_assert!( + !self.is_block_inserted(block), + "Cannot insert block that is already in the layout" + ); + debug_assert!( + self.is_block_inserted(before), + "block Insertion point not in the layout" + ); + let after = self.blocks[before].prev; + { + let node = &mut self.blocks[block]; + node.next = before.into(); + node.prev = after; + } + self.blocks[before].prev = block.into(); + match after.expand() { + None => self.first_block = Some(block), + Some(a) => self.blocks[a].next = block.into(), + } + } + + /// Insert `block` in the layout *after* the existing block `after`. + pub fn insert_block_after(&mut self, block: Block, after: Block) { + debug_assert!( + !self.is_block_inserted(block), + "Cannot insert block that is already in the layout" + ); + debug_assert!( + self.is_block_inserted(after), + "block Insertion point not in the layout" + ); + let before = self.blocks[after].next; + { + let node = &mut self.blocks[block]; + node.next = before; + node.prev = after.into(); + } + self.blocks[after].next = block.into(); + match before.expand() { + None => self.last_block = Some(block), + Some(b) => self.blocks[b].prev = block.into(), + } + } + + /// Remove `block` from the layout. + pub fn remove_block(&mut self, block: Block) { + debug_assert!(self.is_block_inserted(block), "block not in the layout"); + debug_assert!(self.first_inst(block).is_none(), "block must be empty."); + + // Clear the `block` node and extract links. + let prev; + let next; + { + let n = &mut self.blocks[block]; + prev = n.prev; + next = n.next; + n.prev = None.into(); + n.next = None.into(); + } + // Fix up links to `block`. + match prev.expand() { + None => self.first_block = next.expand(), + Some(p) => self.blocks[p].next = next, + } + match next.expand() { + None => self.last_block = prev.expand(), + Some(n) => self.blocks[n].prev = prev, + } + } + + /// Return an iterator over all blocks in layout order. + pub fn blocks(&self) -> Blocks { + Blocks { + layout: self, + next: self.first_block, + } + } + + /// Get the function's entry block. + /// This is simply the first block in the layout order. + pub fn entry_block(&self) -> Option { + self.first_block + } + + /// Get the last block in the layout. + pub fn last_block(&self) -> Option { + self.last_block + } + + /// Get the block preceding `block` in the layout order. + pub fn prev_block(&self, block: Block) -> Option { + self.blocks[block].prev.expand() + } + + /// Get the block following `block` in the layout order. + pub fn next_block(&self, block: Block) -> Option { + self.blocks[block].next.expand() + } + + /// Mark a block as "cold". + /// + /// This will try to move it out of the ordinary path of execution + /// when lowered to machine code. + pub fn set_cold(&mut self, block: Block) { + self.blocks[block].cold = true; + } + + /// Is the given block cold? + pub fn is_cold(&self, block: Block) -> bool { + self.blocks[block].cold + } +} + +/// A single node in the linked-list of blocks. +// **Note:** Whenever you add new fields here, don't forget to update the custom serializer for `Layout` too. +#[derive(Clone, Debug, Default, PartialEq, Hash)] +struct BlockNode { + prev: PackedOption, + next: PackedOption, + first_inst: PackedOption, + last_inst: PackedOption, + cold: bool, +} + +/// Iterate over blocks in layout order. See [crate::ir::layout::Layout::blocks]. +pub struct Blocks<'f> { + layout: &'f Layout, + next: Option, +} + +impl<'f> Iterator for Blocks<'f> { + type Item = Block; + + fn next(&mut self) -> Option { + match self.next { + Some(block) => { + self.next = self.layout.next_block(block); + Some(block) + } + None => None, + } + } +} + +/// Use a layout reference in a for loop. +impl<'f> IntoIterator for &'f Layout { + type Item = Block; + type IntoIter = Blocks<'f>; + + fn into_iter(self) -> Blocks<'f> { + self.blocks() + } +} + +/// Methods for arranging instructions. +/// +/// An instruction starts out as *not inserted* in the layout. An instruction can be inserted into +/// a block at a given position. +impl Layout { + /// Get the block containing `inst`, or `None` if `inst` is not inserted in the layout. + pub fn inst_block(&self, inst: Inst) -> Option { + self.insts[inst].block.into() + } + + /// Get the block containing the program point `pp`. Panic if `pp` is not in the layout. + pub fn pp_block(&self, pp: ProgramPoint) -> Block { + match pp { + ProgramPoint::Block(block) => block, + ProgramPoint::Inst(inst) => self.inst_block(inst).expect("Program point not in layout"), + } + } + + /// Append `inst` to the end of `block`. + pub fn append_inst(&mut self, inst: Inst, block: Block) { + debug_assert_eq!(self.inst_block(inst), None); + debug_assert!( + self.is_block_inserted(block), + "Cannot append instructions to block not in layout" + ); + { + let block_node = &mut self.blocks[block]; + { + let inst_node = &mut self.insts[inst]; + inst_node.block = block.into(); + inst_node.prev = block_node.last_inst; + debug_assert!(inst_node.next.is_none()); + } + if block_node.first_inst.is_none() { + block_node.first_inst = inst.into(); + } else { + self.insts[block_node.last_inst.unwrap()].next = inst.into(); + } + block_node.last_inst = inst.into(); + } + self.assign_inst_seq(inst); + } + + /// Fetch a block's first instruction. + pub fn first_inst(&self, block: Block) -> Option { + self.blocks[block].first_inst.into() + } + + /// Fetch a block's last instruction. + pub fn last_inst(&self, block: Block) -> Option { + self.blocks[block].last_inst.into() + } + + /// Fetch the instruction following `inst`. + pub fn next_inst(&self, inst: Inst) -> Option { + self.insts[inst].next.expand() + } + + /// Fetch the instruction preceding `inst`. + pub fn prev_inst(&self, inst: Inst) -> Option { + self.insts[inst].prev.expand() + } + + /// Insert `inst` before the instruction `before` in the same block. + pub fn insert_inst(&mut self, inst: Inst, before: Inst) { + debug_assert_eq!(self.inst_block(inst), None); + let block = self + .inst_block(before) + .expect("Instruction before insertion point not in the layout"); + let after = self.insts[before].prev; + { + let inst_node = &mut self.insts[inst]; + inst_node.block = block.into(); + inst_node.next = before.into(); + inst_node.prev = after; + } + self.insts[before].prev = inst.into(); + match after.expand() { + None => self.blocks[block].first_inst = inst.into(), + Some(a) => self.insts[a].next = inst.into(), + } + self.assign_inst_seq(inst); + } + + /// Remove `inst` from the layout. + pub fn remove_inst(&mut self, inst: Inst) { + let block = self.inst_block(inst).expect("Instruction already removed."); + // Clear the `inst` node and extract links. + let prev; + let next; + { + let n = &mut self.insts[inst]; + prev = n.prev; + next = n.next; + n.block = None.into(); + n.prev = None.into(); + n.next = None.into(); + } + // Fix up links to `inst`. + match prev.expand() { + None => self.blocks[block].first_inst = next, + Some(p) => self.insts[p].next = next, + } + match next.expand() { + None => self.blocks[block].last_inst = prev, + Some(n) => self.insts[n].prev = prev, + } + } + + /// Iterate over the instructions in `block` in layout order. + pub fn block_insts(&self, block: Block) -> Insts { + Insts { + layout: self, + head: self.blocks[block].first_inst.into(), + tail: self.blocks[block].last_inst.into(), + } + } + + /// Split the block containing `before` in two. + /// + /// Insert `new_block` after the old block and move `before` and the following instructions to + /// `new_block`: + /// + /// ```text + /// old_block: + /// i1 + /// i2 + /// i3 << before + /// i4 + /// ``` + /// becomes: + /// + /// ```text + /// old_block: + /// i1 + /// i2 + /// new_block: + /// i3 << before + /// i4 + /// ``` + pub fn split_block(&mut self, new_block: Block, before: Inst) { + let old_block = self + .inst_block(before) + .expect("The `before` instruction must be in the layout"); + debug_assert!(!self.is_block_inserted(new_block)); + + // Insert new_block after old_block. + let next_block = self.blocks[old_block].next; + let last_inst = self.blocks[old_block].last_inst; + { + let node = &mut self.blocks[new_block]; + node.prev = old_block.into(); + node.next = next_block; + node.first_inst = before.into(); + node.last_inst = last_inst; + } + self.blocks[old_block].next = new_block.into(); + + // Fix backwards link. + if Some(old_block) == self.last_block { + self.last_block = Some(new_block); + } else { + self.blocks[next_block.unwrap()].prev = new_block.into(); + } + + // Disconnect the instruction links. + let prev_inst = self.insts[before].prev; + self.insts[before].prev = None.into(); + self.blocks[old_block].last_inst = prev_inst; + match prev_inst.expand() { + None => self.blocks[old_block].first_inst = None.into(), + Some(pi) => self.insts[pi].next = None.into(), + } + + // Fix the instruction -> block pointers. + let mut opt_i = Some(before); + while let Some(i) = opt_i { + debug_assert_eq!(self.insts[i].block.expand(), Some(old_block)); + self.insts[i].block = new_block.into(); + opt_i = self.insts[i].next.into(); + } + } +} + +#[derive(Clone, Debug, Default)] +struct InstNode { + /// The Block containing this instruction, or `None` if the instruction is not yet inserted. + block: PackedOption, + prev: PackedOption, + next: PackedOption, + seq: SequenceNumber, +} + +impl PartialEq for InstNode { + fn eq(&self, other: &Self) -> bool { + // Ignore the sequence number as it is an optimization used by pp_cmp and may be different + // even for equivalent layouts. + self.block == other.block && self.prev == other.prev && self.next == other.next + } +} + +impl core::hash::Hash for InstNode { + fn hash(&self, state: &mut H) { + // Ignore the sequence number as it is an optimization used by pp_cmp and may be different + // even for equivalent layouts. + self.block.hash(state); + self.prev.hash(state); + self.next.hash(state); + } +} + +/// Iterate over instructions in a block in layout order. See `Layout::block_insts()`. +pub struct Insts<'f> { + layout: &'f Layout, + head: Option, + tail: Option, +} + +impl<'f> Iterator for Insts<'f> { + type Item = Inst; + + fn next(&mut self) -> Option { + let rval = self.head; + if let Some(inst) = rval { + if self.head == self.tail { + self.head = None; + self.tail = None; + } else { + self.head = self.layout.insts[inst].next.into(); + } + } + rval + } +} + +impl<'f> DoubleEndedIterator for Insts<'f> { + fn next_back(&mut self) -> Option { + let rval = self.tail; + if let Some(inst) = rval { + if self.head == self.tail { + self.head = None; + self.tail = None; + } else { + self.tail = self.layout.insts[inst].prev.into(); + } + } + rval + } +} + +/// A custom serialize and deserialize implementation for [`Layout`]. +/// +/// This doesn't use a derived implementation as [`Layout`] is a manual implementation of a linked +/// list. Storing it directly as a regular list saves a lot of space. +/// +/// The following format is used. (notated in EBNF form) +/// +/// ```plain +/// data = block_data * ; +/// block_data = "block_id" , "cold" , "inst_count" , ( "inst_id" * ) ; +/// ``` +#[cfg(feature = "enable-serde")] +mod serde { + use ::serde::de::{Deserializer, Error, SeqAccess, Visitor}; + use ::serde::ser::{SerializeSeq, Serializer}; + use ::serde::{Deserialize, Serialize}; + use core::fmt; + use core::marker::PhantomData; + + use super::*; + + impl Serialize for Layout { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let size = self.blocks().count() * 3 + + self + .blocks() + .map(|block| self.block_insts(block).count()) + .sum::(); + let mut seq = serializer.serialize_seq(Some(size))?; + for block in self.blocks() { + seq.serialize_element(&block)?; + seq.serialize_element(&self.blocks[block].cold)?; + seq.serialize_element(&u32::try_from(self.block_insts(block).count()).unwrap())?; + for inst in self.block_insts(block) { + seq.serialize_element(&inst)?; + } + } + seq.end() + } + } + + impl<'de> Deserialize<'de> for Layout { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + deserializer.deserialize_seq(LayoutVisitor { + marker: PhantomData, + }) + } + } + + struct LayoutVisitor { + marker: PhantomData Layout>, + } + + impl<'de> Visitor<'de> for LayoutVisitor { + type Value = Layout; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + write!(formatter, "a `cranelift_codegen::ir::Layout`") + } + + fn visit_seq(self, mut access: M) -> Result + where + M: SeqAccess<'de>, + { + let mut layout = Layout::new(); + + while let Some(block) = access.next_element::()? { + layout.append_block(block); + + let cold = access + .next_element::()? + .ok_or_else(|| Error::missing_field("cold"))?; + layout.blocks[block].cold = cold; + + let count = access + .next_element::()? + .ok_or_else(|| Error::missing_field("count"))?; + + for _ in 0..count { + let inst = access + .next_element::()? + .ok_or_else(|| Error::missing_field("inst"))?; + layout.append_inst(inst, block); + } + } + + Ok(layout) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::cursor::{Cursor, CursorPosition}; + use crate::entity::EntityRef; + use crate::ir::{Block, Inst, SourceLoc}; + use alloc::vec::Vec; + use core::cmp::Ordering; + + #[test] + fn test_midpoint() { + assert_eq!(midpoint(0, 1), None); + assert_eq!(midpoint(0, 2), Some(1)); + assert_eq!(midpoint(0, 3), Some(1)); + assert_eq!(midpoint(0, 4), Some(2)); + assert_eq!(midpoint(1, 4), Some(2)); + assert_eq!(midpoint(2, 4), Some(3)); + assert_eq!(midpoint(3, 4), None); + assert_eq!(midpoint(3, 4), None); + } + + struct LayoutCursor<'f> { + /// Borrowed function layout. Public so it can be re-borrowed from this cursor. + pub layout: &'f mut Layout, + pos: CursorPosition, + } + + impl<'f> Cursor for LayoutCursor<'f> { + fn position(&self) -> CursorPosition { + self.pos + } + + fn set_position(&mut self, pos: CursorPosition) { + self.pos = pos; + } + + fn srcloc(&self) -> SourceLoc { + unimplemented!() + } + + fn set_srcloc(&mut self, _srcloc: SourceLoc) { + unimplemented!() + } + + fn layout(&self) -> &Layout { + self.layout + } + + fn layout_mut(&mut self) -> &mut Layout { + self.layout + } + } + + impl<'f> LayoutCursor<'f> { + /// Create a new `LayoutCursor` for `layout`. + /// The cursor holds a mutable reference to `layout` for its entire lifetime. + pub fn new(layout: &'f mut Layout) -> Self { + Self { + layout, + pos: CursorPosition::Nowhere, + } + } + } + + fn verify(layout: &mut Layout, blocks: &[(Block, &[Inst])]) { + // Check that blocks are inserted and instructions belong the right places. + // Check forward linkage with iterators. + // Check that layout sequence numbers are strictly monotonic. + { + let mut block_iter = layout.blocks(); + for &(block, insts) in blocks { + assert!(layout.is_block_inserted(block)); + assert_eq!(block_iter.next(), Some(block)); + + let mut seq = 0; + let mut inst_iter = layout.block_insts(block); + for &inst in insts { + assert_eq!(layout.inst_block(inst), Some(block)); + assert_eq!(inst_iter.next(), Some(inst)); + assert!(layout.insts[inst].seq > seq); + seq = layout.insts[inst].seq; + } + assert_eq!(inst_iter.next(), None); + } + assert_eq!(block_iter.next(), None); + } + + // Check backwards linkage with a cursor. + let mut cur = LayoutCursor::new(layout); + for &(block, insts) in blocks.into_iter().rev() { + assert_eq!(cur.prev_block(), Some(block)); + for &inst in insts.into_iter().rev() { + assert_eq!(cur.prev_inst(), Some(inst)); + } + assert_eq!(cur.prev_inst(), None); + } + assert_eq!(cur.prev_block(), None); + } + + #[test] + fn append_block() { + let mut layout = Layout::new(); + let e0 = Block::new(0); + let e1 = Block::new(1); + let e2 = Block::new(2); + + { + let imm = &layout; + assert!(!imm.is_block_inserted(e0)); + assert!(!imm.is_block_inserted(e1)); + } + verify(&mut layout, &[]); + + layout.append_block(e1); + assert!(!layout.is_block_inserted(e0)); + assert!(layout.is_block_inserted(e1)); + assert!(!layout.is_block_inserted(e2)); + let v: Vec = layout.blocks().collect(); + assert_eq!(v, [e1]); + + layout.append_block(e2); + assert!(!layout.is_block_inserted(e0)); + assert!(layout.is_block_inserted(e1)); + assert!(layout.is_block_inserted(e2)); + let v: Vec = layout.blocks().collect(); + assert_eq!(v, [e1, e2]); + + layout.append_block(e0); + assert!(layout.is_block_inserted(e0)); + assert!(layout.is_block_inserted(e1)); + assert!(layout.is_block_inserted(e2)); + let v: Vec = layout.blocks().collect(); + assert_eq!(v, [e1, e2, e0]); + + { + let imm = &layout; + let mut v = Vec::new(); + for e in imm { + v.push(e); + } + assert_eq!(v, [e1, e2, e0]); + } + + // Test cursor positioning. + let mut cur = LayoutCursor::new(&mut layout); + assert_eq!(cur.position(), CursorPosition::Nowhere); + assert_eq!(cur.next_inst(), None); + assert_eq!(cur.position(), CursorPosition::Nowhere); + assert_eq!(cur.prev_inst(), None); + assert_eq!(cur.position(), CursorPosition::Nowhere); + + assert_eq!(cur.next_block(), Some(e1)); + assert_eq!(cur.position(), CursorPosition::Before(e1)); + assert_eq!(cur.next_inst(), None); + assert_eq!(cur.position(), CursorPosition::After(e1)); + assert_eq!(cur.next_inst(), None); + assert_eq!(cur.position(), CursorPosition::After(e1)); + assert_eq!(cur.next_block(), Some(e2)); + assert_eq!(cur.prev_inst(), None); + assert_eq!(cur.position(), CursorPosition::Before(e2)); + assert_eq!(cur.next_block(), Some(e0)); + assert_eq!(cur.next_block(), None); + assert_eq!(cur.position(), CursorPosition::Nowhere); + + // Backwards through the blocks. + assert_eq!(cur.prev_block(), Some(e0)); + assert_eq!(cur.position(), CursorPosition::After(e0)); + assert_eq!(cur.prev_block(), Some(e2)); + assert_eq!(cur.prev_block(), Some(e1)); + assert_eq!(cur.prev_block(), None); + assert_eq!(cur.position(), CursorPosition::Nowhere); + } + + #[test] + fn insert_block() { + let mut layout = Layout::new(); + let e0 = Block::new(0); + let e1 = Block::new(1); + let e2 = Block::new(2); + + { + let imm = &layout; + assert!(!imm.is_block_inserted(e0)); + assert!(!imm.is_block_inserted(e1)); + + let v: Vec = layout.blocks().collect(); + assert_eq!(v, []); + } + + layout.append_block(e1); + assert!(!layout.is_block_inserted(e0)); + assert!(layout.is_block_inserted(e1)); + assert!(!layout.is_block_inserted(e2)); + verify(&mut layout, &[(e1, &[])]); + + layout.insert_block(e2, e1); + assert!(!layout.is_block_inserted(e0)); + assert!(layout.is_block_inserted(e1)); + assert!(layout.is_block_inserted(e2)); + verify(&mut layout, &[(e2, &[]), (e1, &[])]); + + layout.insert_block(e0, e1); + assert!(layout.is_block_inserted(e0)); + assert!(layout.is_block_inserted(e1)); + assert!(layout.is_block_inserted(e2)); + verify(&mut layout, &[(e2, &[]), (e0, &[]), (e1, &[])]); + } + + #[test] + fn insert_block_after() { + let mut layout = Layout::new(); + let e0 = Block::new(0); + let e1 = Block::new(1); + let e2 = Block::new(2); + + layout.append_block(e1); + layout.insert_block_after(e2, e1); + verify(&mut layout, &[(e1, &[]), (e2, &[])]); + + layout.insert_block_after(e0, e1); + verify(&mut layout, &[(e1, &[]), (e0, &[]), (e2, &[])]); + } + + #[test] + fn append_inst() { + let mut layout = Layout::new(); + let e1 = Block::new(1); + + layout.append_block(e1); + let v: Vec = layout.block_insts(e1).collect(); + assert_eq!(v, []); + + let i0 = Inst::new(0); + let i1 = Inst::new(1); + let i2 = Inst::new(2); + + assert_eq!(layout.inst_block(i0), None); + assert_eq!(layout.inst_block(i1), None); + assert_eq!(layout.inst_block(i2), None); + + layout.append_inst(i1, e1); + assert_eq!(layout.inst_block(i0), None); + assert_eq!(layout.inst_block(i1), Some(e1)); + assert_eq!(layout.inst_block(i2), None); + let v: Vec = layout.block_insts(e1).collect(); + assert_eq!(v, [i1]); + + layout.append_inst(i2, e1); + assert_eq!(layout.inst_block(i0), None); + assert_eq!(layout.inst_block(i1), Some(e1)); + assert_eq!(layout.inst_block(i2), Some(e1)); + let v: Vec = layout.block_insts(e1).collect(); + assert_eq!(v, [i1, i2]); + + // Test double-ended instruction iterator. + let v: Vec = layout.block_insts(e1).rev().collect(); + assert_eq!(v, [i2, i1]); + + layout.append_inst(i0, e1); + verify(&mut layout, &[(e1, &[i1, i2, i0])]); + + // Test cursor positioning. + let mut cur = LayoutCursor::new(&mut layout).at_top(e1); + assert_eq!(cur.position(), CursorPosition::Before(e1)); + assert_eq!(cur.prev_inst(), None); + assert_eq!(cur.position(), CursorPosition::Before(e1)); + assert_eq!(cur.next_inst(), Some(i1)); + assert_eq!(cur.position(), CursorPosition::At(i1)); + assert_eq!(cur.next_inst(), Some(i2)); + assert_eq!(cur.next_inst(), Some(i0)); + assert_eq!(cur.prev_inst(), Some(i2)); + assert_eq!(cur.position(), CursorPosition::At(i2)); + assert_eq!(cur.next_inst(), Some(i0)); + assert_eq!(cur.position(), CursorPosition::At(i0)); + assert_eq!(cur.next_inst(), None); + assert_eq!(cur.position(), CursorPosition::After(e1)); + assert_eq!(cur.next_inst(), None); + assert_eq!(cur.position(), CursorPosition::After(e1)); + assert_eq!(cur.prev_inst(), Some(i0)); + assert_eq!(cur.prev_inst(), Some(i2)); + assert_eq!(cur.prev_inst(), Some(i1)); + assert_eq!(cur.prev_inst(), None); + assert_eq!(cur.position(), CursorPosition::Before(e1)); + + // Test remove_inst. + cur.goto_inst(i2); + assert_eq!(cur.remove_inst(), i2); + verify(cur.layout, &[(e1, &[i1, i0])]); + assert_eq!(cur.layout.inst_block(i2), None); + assert_eq!(cur.remove_inst(), i0); + verify(cur.layout, &[(e1, &[i1])]); + assert_eq!(cur.layout.inst_block(i0), None); + assert_eq!(cur.position(), CursorPosition::After(e1)); + cur.layout.remove_inst(i1); + verify(cur.layout, &[(e1, &[])]); + assert_eq!(cur.layout.inst_block(i1), None); + } + + #[test] + fn insert_inst() { + let mut layout = Layout::new(); + let e1 = Block::new(1); + + layout.append_block(e1); + let v: Vec = layout.block_insts(e1).collect(); + assert_eq!(v, []); + + let i0 = Inst::new(0); + let i1 = Inst::new(1); + let i2 = Inst::new(2); + + assert_eq!(layout.inst_block(i0), None); + assert_eq!(layout.inst_block(i1), None); + assert_eq!(layout.inst_block(i2), None); + + layout.append_inst(i1, e1); + assert_eq!(layout.inst_block(i0), None); + assert_eq!(layout.inst_block(i1), Some(e1)); + assert_eq!(layout.inst_block(i2), None); + let v: Vec = layout.block_insts(e1).collect(); + assert_eq!(v, [i1]); + + layout.insert_inst(i2, i1); + assert_eq!(layout.inst_block(i0), None); + assert_eq!(layout.inst_block(i1), Some(e1)); + assert_eq!(layout.inst_block(i2), Some(e1)); + let v: Vec = layout.block_insts(e1).collect(); + assert_eq!(v, [i2, i1]); + + layout.insert_inst(i0, i1); + verify(&mut layout, &[(e1, &[i2, i0, i1])]); + } + + #[test] + fn multiple_blocks() { + let mut layout = Layout::new(); + + let e0 = Block::new(0); + let e1 = Block::new(1); + + assert_eq!(layout.entry_block(), None); + layout.append_block(e0); + assert_eq!(layout.entry_block(), Some(e0)); + layout.append_block(e1); + assert_eq!(layout.entry_block(), Some(e0)); + + let i0 = Inst::new(0); + let i1 = Inst::new(1); + let i2 = Inst::new(2); + let i3 = Inst::new(3); + + layout.append_inst(i0, e0); + layout.append_inst(i1, e0); + layout.append_inst(i2, e1); + layout.append_inst(i3, e1); + + let v0: Vec = layout.block_insts(e0).collect(); + let v1: Vec = layout.block_insts(e1).collect(); + assert_eq!(v0, [i0, i1]); + assert_eq!(v1, [i2, i3]); + } + + #[test] + fn split_block() { + let mut layout = Layout::new(); + + let e0 = Block::new(0); + let e1 = Block::new(1); + let e2 = Block::new(2); + + let i0 = Inst::new(0); + let i1 = Inst::new(1); + let i2 = Inst::new(2); + let i3 = Inst::new(3); + + layout.append_block(e0); + layout.append_inst(i0, e0); + assert_eq!(layout.inst_block(i0), Some(e0)); + layout.split_block(e1, i0); + assert_eq!(layout.inst_block(i0), Some(e1)); + + { + let mut cur = LayoutCursor::new(&mut layout); + assert_eq!(cur.next_block(), Some(e0)); + assert_eq!(cur.next_inst(), None); + assert_eq!(cur.next_block(), Some(e1)); + assert_eq!(cur.next_inst(), Some(i0)); + assert_eq!(cur.next_inst(), None); + assert_eq!(cur.next_block(), None); + + // Check backwards links. + assert_eq!(cur.prev_block(), Some(e1)); + assert_eq!(cur.prev_inst(), Some(i0)); + assert_eq!(cur.prev_inst(), None); + assert_eq!(cur.prev_block(), Some(e0)); + assert_eq!(cur.prev_inst(), None); + assert_eq!(cur.prev_block(), None); + } + + layout.append_inst(i1, e0); + layout.append_inst(i2, e0); + layout.append_inst(i3, e0); + layout.split_block(e2, i2); + + assert_eq!(layout.inst_block(i0), Some(e1)); + assert_eq!(layout.inst_block(i1), Some(e0)); + assert_eq!(layout.inst_block(i2), Some(e2)); + assert_eq!(layout.inst_block(i3), Some(e2)); + + { + let mut cur = LayoutCursor::new(&mut layout); + assert_eq!(cur.next_block(), Some(e0)); + assert_eq!(cur.next_inst(), Some(i1)); + assert_eq!(cur.next_inst(), None); + assert_eq!(cur.next_block(), Some(e2)); + assert_eq!(cur.next_inst(), Some(i2)); + assert_eq!(cur.next_inst(), Some(i3)); + assert_eq!(cur.next_inst(), None); + assert_eq!(cur.next_block(), Some(e1)); + assert_eq!(cur.next_inst(), Some(i0)); + assert_eq!(cur.next_inst(), None); + assert_eq!(cur.next_block(), None); + + assert_eq!(cur.prev_block(), Some(e1)); + assert_eq!(cur.prev_inst(), Some(i0)); + assert_eq!(cur.prev_inst(), None); + assert_eq!(cur.prev_block(), Some(e2)); + assert_eq!(cur.prev_inst(), Some(i3)); + assert_eq!(cur.prev_inst(), Some(i2)); + assert_eq!(cur.prev_inst(), None); + assert_eq!(cur.prev_block(), Some(e0)); + assert_eq!(cur.prev_inst(), Some(i1)); + assert_eq!(cur.prev_inst(), None); + assert_eq!(cur.prev_block(), None); + } + + // Check `ProgramOrder`. + assert_eq!(layout.pp_cmp(e2, e2), Ordering::Equal); + assert_eq!(layout.pp_cmp(e2, i2), Ordering::Less); + assert_eq!(layout.pp_cmp(i3, i2), Ordering::Greater) + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/libcall.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/libcall.rs new file mode 100644 index 000000000..1e05adcc5 --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/libcall.rs @@ -0,0 +1,232 @@ +//! Naming well-known routines in the runtime library. + +use crate::{ + ir::{types, AbiParam, ExternalName, FuncRef, Function, Signature, Type}, + isa::CallConv, +}; +use core::fmt; +use core::str::FromStr; +#[cfg(feature = "enable-serde")] +use serde_derive::{Deserialize, Serialize}; + +/// The name of a runtime library routine. +/// +/// Runtime library calls are generated for Cranelift IR instructions that don't have an equivalent +/// ISA instruction or an easy macro expansion. A `LibCall` is used as a well-known name to refer to +/// the runtime library routine. This way, Cranelift doesn't have to know about the naming +/// convention in the embedding VM's runtime library. +/// +/// This list is likely to grow over time. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub enum LibCall { + /// probe for stack overflow. These are emitted for functions which need + /// when the `enable_probestack` setting is true. + Probestack, + /// ceil.f32 + CeilF32, + /// ceil.f64 + CeilF64, + /// floor.f32 + FloorF32, + /// floor.f64 + FloorF64, + /// trunc.f32 + TruncF32, + /// frunc.f64 + TruncF64, + /// nearest.f32 + NearestF32, + /// nearest.f64 + NearestF64, + /// fma.f32 + FmaF32, + /// fma.f64 + FmaF64, + /// libc.memcpy + Memcpy, + /// libc.memset + Memset, + /// libc.memmove + Memmove, + /// libc.memcmp + Memcmp, + + /// Elf __tls_get_addr + ElfTlsGetAddr, + /// Elf __tls_get_offset + ElfTlsGetOffset, + + /// The `pshufb` on x86 when SSSE3 isn't available. + X86Pshufb, + // When adding a new variant make sure to add it to `all_libcalls` too. +} + +impl fmt::Display for LibCall { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Debug::fmt(self, f) + } +} + +impl FromStr for LibCall { + type Err = (); + + fn from_str(s: &str) -> Result { + match s { + "Probestack" => Ok(Self::Probestack), + "CeilF32" => Ok(Self::CeilF32), + "CeilF64" => Ok(Self::CeilF64), + "FloorF32" => Ok(Self::FloorF32), + "FloorF64" => Ok(Self::FloorF64), + "TruncF32" => Ok(Self::TruncF32), + "TruncF64" => Ok(Self::TruncF64), + "NearestF32" => Ok(Self::NearestF32), + "NearestF64" => Ok(Self::NearestF64), + "FmaF32" => Ok(Self::FmaF32), + "FmaF64" => Ok(Self::FmaF64), + "Memcpy" => Ok(Self::Memcpy), + "Memset" => Ok(Self::Memset), + "Memmove" => Ok(Self::Memmove), + "Memcmp" => Ok(Self::Memcmp), + + "ElfTlsGetAddr" => Ok(Self::ElfTlsGetAddr), + "ElfTlsGetOffset" => Ok(Self::ElfTlsGetOffset), + + "X86Pshufb" => Ok(Self::X86Pshufb), + _ => Err(()), + } + } +} + +impl LibCall { + /// Get a list of all known `LibCall`'s. + pub fn all_libcalls() -> &'static [LibCall] { + use LibCall::*; + &[ + Probestack, + CeilF32, + CeilF64, + FloorF32, + FloorF64, + TruncF32, + TruncF64, + NearestF32, + NearestF64, + FmaF32, + FmaF64, + Memcpy, + Memset, + Memmove, + Memcmp, + ElfTlsGetAddr, + ElfTlsGetOffset, + X86Pshufb, + ] + } + + /// Get a [Signature] for the function targeted by this [LibCall]. + pub fn signature(&self, call_conv: CallConv, pointer_type: Type) -> Signature { + use types::*; + let mut sig = Signature::new(call_conv); + + match self { + LibCall::CeilF32 | LibCall::FloorF32 | LibCall::TruncF32 | LibCall::NearestF32 => { + sig.params.push(AbiParam::new(F32)); + sig.returns.push(AbiParam::new(F32)); + } + LibCall::TruncF64 | LibCall::FloorF64 | LibCall::CeilF64 | LibCall::NearestF64 => { + sig.params.push(AbiParam::new(F64)); + sig.returns.push(AbiParam::new(F64)); + } + LibCall::FmaF32 | LibCall::FmaF64 => { + let ty = if *self == LibCall::FmaF32 { F32 } else { F64 }; + + sig.params.push(AbiParam::new(ty)); + sig.params.push(AbiParam::new(ty)); + sig.params.push(AbiParam::new(ty)); + sig.returns.push(AbiParam::new(ty)); + } + LibCall::Memcpy | LibCall::Memmove => { + // void* memcpy(void *dest, const void *src, size_t count); + // void* memmove(void* dest, const void* src, size_t count); + sig.params.push(AbiParam::new(pointer_type)); + sig.params.push(AbiParam::new(pointer_type)); + sig.params.push(AbiParam::new(pointer_type)); + sig.returns.push(AbiParam::new(pointer_type)); + } + LibCall::Memset => { + // void *memset(void *dest, int ch, size_t count); + sig.params.push(AbiParam::new(pointer_type)); + sig.params.push(AbiParam::new(I32)); + sig.params.push(AbiParam::new(pointer_type)); + sig.returns.push(AbiParam::new(pointer_type)); + } + LibCall::Memcmp => { + // void* memcpy(void *dest, const void *src, size_t count); + sig.params.push(AbiParam::new(pointer_type)); + sig.params.push(AbiParam::new(pointer_type)); + sig.params.push(AbiParam::new(pointer_type)); + sig.returns.push(AbiParam::new(I32)) + } + + LibCall::Probestack | LibCall::ElfTlsGetAddr | LibCall::ElfTlsGetOffset => { + unimplemented!() + } + LibCall::X86Pshufb => { + sig.params.push(AbiParam::new(I8X16)); + sig.params.push(AbiParam::new(I8X16)); + sig.returns.push(AbiParam::new(I8X16)); + } + } + + sig + } +} + +/// Get a function reference for the probestack function in `func`. +/// +/// If there is an existing reference, use it, otherwise make a new one. +pub fn get_probestack_funcref(func: &mut Function) -> Option { + find_funcref(LibCall::Probestack, func) +} + +/// Get the existing function reference for `libcall` in `func` if it exists. +fn find_funcref(libcall: LibCall, func: &Function) -> Option { + // We're assuming that all libcall function decls are at the end. + // If we get this wrong, worst case we'll have duplicate libcall decls which is harmless. + for (fref, func_data) in func.dfg.ext_funcs.iter().rev() { + match func_data.name { + ExternalName::LibCall(lc) => { + if lc == libcall { + return Some(fref); + } + } + _ => break, + } + } + None +} + +#[cfg(test)] +mod tests { + use super::*; + use alloc::string::ToString; + + #[test] + fn display() { + assert_eq!(LibCall::CeilF32.to_string(), "CeilF32"); + assert_eq!(LibCall::NearestF64.to_string(), "NearestF64"); + } + + #[test] + fn parsing() { + assert_eq!("FloorF32".parse(), Ok(LibCall::FloorF32)); + } + + #[test] + fn all_libcalls_to_from_string() { + for &libcall in LibCall::all_libcalls() { + assert_eq!(libcall.to_string().parse(), Ok(libcall)); + } + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/memflags.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/memflags.rs new file mode 100644 index 000000000..d9ac892c7 --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/memflags.rs @@ -0,0 +1,487 @@ +//! Memory operation flags. + +use super::TrapCode; +use core::fmt; +use core::num::NonZeroU8; +use core::str::FromStr; + +#[cfg(feature = "enable-serde")] +use serde_derive::{Deserialize, Serialize}; + +/// Endianness of a memory access. +#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)] +pub enum Endianness { + /// Little-endian + Little, + /// Big-endian + Big, +} + +/// Which disjoint region of aliasing memory is accessed in this memory +/// operation. +#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)] +#[repr(u8)] +#[allow(missing_docs)] +#[rustfmt::skip] +pub enum AliasRegion { + // None = 0b00; + Heap = 0b01, + Table = 0b10, + Vmctx = 0b11, +} + +impl AliasRegion { + const fn from_bits(bits: u8) -> Option { + match bits { + 0b00 => None, + 0b01 => Some(Self::Heap), + 0b10 => Some(Self::Table), + 0b11 => Some(Self::Vmctx), + _ => panic!("invalid alias region bits"), + } + } + + const fn to_bits(region: Option) -> u8 { + match region { + None => 0b00, + Some(r) => r as u8, + } + } +} + +/// Flags for memory operations like load/store. +/// +/// Each of these flags introduce a limited form of undefined behavior. The flags each enable +/// certain optimizations that need to make additional assumptions. Generally, the semantics of a +/// program does not change when a flag is removed, but adding a flag will. +/// +/// In addition, the flags determine the endianness of the memory access. By default, +/// any memory access uses the native endianness determined by the target ISA. This can +/// be overridden for individual accesses by explicitly specifying little- or big-endian +/// semantics via the flags. +#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct MemFlags { + // Initialized to all zeros to have all flags have their default value. + // This is interpreted through various methods below. Currently the bits of + // this are defined as: + // + // * 0 - aligned flag + // * 1 - readonly flag + // * 2 - little endian flag + // * 3 - big endian flag + // * 4 - checked flag + // * 5/6 - alias region + // * 7/8/9/10/11/12/13/14 - trap code + // * 15 - can_move flag + // + // Current properties upheld are: + // + // * only one of little/big endian is set + // * only one alias region can be set - once set it cannot be changed + bits: u16, +} + +/// Guaranteed to use "natural alignment" for the given type. This +/// may enable better instruction selection. +const BIT_ALIGNED: u16 = 1 << 0; + +/// A load that reads data in memory that does not change for the +/// duration of the function's execution. This may enable +/// additional optimizations to be performed. +const BIT_READONLY: u16 = 1 << 1; + +/// Load multi-byte values from memory in a little-endian format. +const BIT_LITTLE_ENDIAN: u16 = 1 << 2; + +/// Load multi-byte values from memory in a big-endian format. +const BIT_BIG_ENDIAN: u16 = 1 << 3; + +/// Check this load or store for safety when using the +/// proof-carrying-code framework. The address must have a +/// `PointsTo` fact attached with a sufficiently large valid range +/// for the accessed size. +const BIT_CHECKED: u16 = 1 << 4; + +/// Used for alias analysis, indicates which disjoint part of the abstract state +/// is being accessed. +const MASK_ALIAS_REGION: u16 = 0b11 << ALIAS_REGION_OFFSET; +const ALIAS_REGION_OFFSET: u16 = 5; + +/// Trap code, if any, for this memory operation. +const MASK_TRAP_CODE: u16 = 0b1111_1111 << TRAP_CODE_OFFSET; +const TRAP_CODE_OFFSET: u16 = 7; + +/// Whether this memory operation may be freely moved by the optimizer so long +/// as its data dependencies are satisfied. That is, by setting this flag, the +/// producer is guaranteeing that this memory operation's safety is not guarded +/// by outside-the-data-flow-graph properties, like implicit bounds-checking +/// control dependencies. +const BIT_CAN_MOVE: u16 = 1 << 15; + +impl MemFlags { + /// Create a new empty set of flags. + pub const fn new() -> Self { + Self { bits: 0 }.with_trap_code(Some(TrapCode::HEAP_OUT_OF_BOUNDS)) + } + + /// Create a set of flags representing an access from a "trusted" address, meaning it's + /// known to be aligned and non-trapping. + pub const fn trusted() -> Self { + Self::new().with_notrap().with_aligned() + } + + /// Read a flag bit. + const fn read_bit(self, bit: u16) -> bool { + self.bits & bit != 0 + } + + /// Return a new `MemFlags` with this flag bit set. + const fn with_bit(mut self, bit: u16) -> Self { + self.bits |= bit; + self + } + + /// Reads the alias region that this memory operation works with. + pub const fn alias_region(self) -> Option { + AliasRegion::from_bits(((self.bits & MASK_ALIAS_REGION) >> ALIAS_REGION_OFFSET) as u8) + } + + /// Sets the alias region that this works on to the specified `region`. + pub const fn with_alias_region(mut self, region: Option) -> Self { + let bits = AliasRegion::to_bits(region); + self.bits &= !MASK_ALIAS_REGION; + self.bits |= (bits as u16) << ALIAS_REGION_OFFSET; + self + } + + /// Sets the alias region that this works on to the specified `region`. + pub fn set_alias_region(&mut self, region: Option) { + *self = self.with_alias_region(region); + } + + /// Set a flag bit by name. + /// + /// Returns true if the flag was found and set, false for an unknown flag + /// name. + /// + /// # Errors + /// + /// Returns an error message if the `name` is known but couldn't be applied + /// due to it being a semantic error. + pub fn set_by_name(&mut self, name: &str) -> Result { + *self = match name { + "notrap" => self.with_trap_code(None), + "aligned" => self.with_aligned(), + "readonly" => self.with_readonly(), + "little" => { + if self.read_bit(BIT_BIG_ENDIAN) { + return Err("cannot set both big and little endian bits"); + } + self.with_endianness(Endianness::Little) + } + "big" => { + if self.read_bit(BIT_LITTLE_ENDIAN) { + return Err("cannot set both big and little endian bits"); + } + self.with_endianness(Endianness::Big) + } + "heap" => { + if self.alias_region().is_some() { + return Err("cannot set more than one alias region"); + } + self.with_alias_region(Some(AliasRegion::Heap)) + } + "table" => { + if self.alias_region().is_some() { + return Err("cannot set more than one alias region"); + } + self.with_alias_region(Some(AliasRegion::Table)) + } + "vmctx" => { + if self.alias_region().is_some() { + return Err("cannot set more than one alias region"); + } + self.with_alias_region(Some(AliasRegion::Vmctx)) + } + "checked" => self.with_checked(), + "can_move" => self.with_can_move(), + + other => match TrapCode::from_str(other) { + Ok(code) => self.with_trap_code(Some(code)), + Err(()) => return Ok(false), + }, + }; + Ok(true) + } + + /// Return endianness of the memory access. This will return the endianness + /// explicitly specified by the flags if any, and will default to the native + /// endianness otherwise. The native endianness has to be provided by the + /// caller since it is not explicitly encoded in CLIF IR -- this allows a + /// front end to create IR without having to know the target endianness. + pub const fn endianness(self, native_endianness: Endianness) -> Endianness { + if self.read_bit(BIT_LITTLE_ENDIAN) { + Endianness::Little + } else if self.read_bit(BIT_BIG_ENDIAN) { + Endianness::Big + } else { + native_endianness + } + } + + /// Return endianness of the memory access, if explicitly specified. + /// + /// If the endianness is not explicitly specified, this will return `None`, + /// which means "native endianness". + pub const fn explicit_endianness(self) -> Option { + if self.read_bit(BIT_LITTLE_ENDIAN) { + Some(Endianness::Little) + } else if self.read_bit(BIT_BIG_ENDIAN) { + Some(Endianness::Big) + } else { + None + } + } + + /// Set endianness of the memory access. + pub fn set_endianness(&mut self, endianness: Endianness) { + *self = self.with_endianness(endianness); + } + + /// Set endianness of the memory access, returning new flags. + pub const fn with_endianness(self, endianness: Endianness) -> Self { + let res = match endianness { + Endianness::Little => self.with_bit(BIT_LITTLE_ENDIAN), + Endianness::Big => self.with_bit(BIT_BIG_ENDIAN), + }; + assert!(!(res.read_bit(BIT_LITTLE_ENDIAN) && res.read_bit(BIT_BIG_ENDIAN))); + res + } + + /// Test if this memory operation cannot trap. + /// + /// By default `MemFlags` will assume that any load/store can trap and is + /// associated with a `TrapCode::HeapOutOfBounds` code. If the trap code is + /// configured to `None` though then this method will return `true` and + /// indicates that the memory operation will not trap. + /// + /// If this returns `true` then the memory is *accessible*, which means + /// that accesses will not trap. This makes it possible to delete an unused + /// load or a dead store instruction. + /// + /// This flag does *not* mean that the associated instruction can be + /// code-motioned to arbitrary places in the function so long as its data + /// dependencies are met. This only means that, given its current location + /// in the function, it will never trap. See the `can_move` method for more + /// details. + pub const fn notrap(self) -> bool { + self.trap_code().is_none() + } + + /// Sets the trap code for this `MemFlags` to `None`. + pub fn set_notrap(&mut self) { + *self = self.with_notrap(); + } + + /// Sets the trap code for this `MemFlags` to `None`, returning the new + /// flags. + pub const fn with_notrap(self) -> Self { + self.with_trap_code(None) + } + + /// Is this memory operation safe to move so long as its data dependencies + /// remain satisfied? + /// + /// If this is `true`, then it is okay to code motion this instruction to + /// arbitrary locations, in the function, including across blocks and + /// conditional branches, so long as data dependencies (and trap ordering, + /// if any) are upheld. + /// + /// If this is `false`, then this memory operation's safety potentially + /// relies upon invariants that are not reflected in its data dependencies, + /// and therefore it is not safe to code motion this operation. For example, + /// this operation could be in a block that is dominated by a control-flow + /// bounds check, which is not reflected in its operands, and it would be + /// unsafe to code motion it above the bounds check, even if its data + /// dependencies would still be satisfied. + pub const fn can_move(self) -> bool { + self.read_bit(BIT_CAN_MOVE) + } + + /// Set the `can_move` flag. + pub const fn set_can_move(&mut self) { + *self = self.with_can_move(); + } + + /// Set the `can_move` flag, returning new flags. + pub const fn with_can_move(self) -> Self { + self.with_bit(BIT_CAN_MOVE) + } + + /// Test if the `aligned` flag is set. + /// + /// By default, Cranelift memory instructions work with any unaligned effective address. If the + /// `aligned` flag is set, the instruction is permitted to trap or return a wrong result if the + /// effective address is misaligned. + pub const fn aligned(self) -> bool { + self.read_bit(BIT_ALIGNED) + } + + /// Set the `aligned` flag. + pub fn set_aligned(&mut self) { + *self = self.with_aligned(); + } + + /// Set the `aligned` flag, returning new flags. + pub const fn with_aligned(self) -> Self { + self.with_bit(BIT_ALIGNED) + } + + /// Test if the `readonly` flag is set. + /// + /// Loads with this flag have no memory dependencies. + /// This results in undefined behavior if the dereferenced memory is mutated at any time + /// between when the function is called and when it is exited. + pub const fn readonly(self) -> bool { + self.read_bit(BIT_READONLY) + } + + /// Set the `readonly` flag. + pub fn set_readonly(&mut self) { + *self = self.with_readonly(); + } + + /// Set the `readonly` flag, returning new flags. + pub const fn with_readonly(self) -> Self { + self.with_bit(BIT_READONLY) + } + + /// Test if the `checked` bit is set. + /// + /// Loads and stores with this flag are verified to access + /// pointers only with a validated `PointsTo` fact attached, and + /// with that fact validated, when using the proof-carrying-code + /// framework. If initial facts on program inputs are correct + /// (i.e., correctly denote the shape and types of data structures + /// in memory), and if PCC validates the compiled output, then all + /// `checked`-marked memory accesses are guaranteed (up to the + /// checker's correctness) to access valid memory. This can be + /// used to ensure memory safety and sandboxing. + pub const fn checked(self) -> bool { + self.read_bit(BIT_CHECKED) + } + + /// Set the `checked` bit. + pub fn set_checked(&mut self) { + *self = self.with_checked(); + } + + /// Set the `checked` bit, returning new flags. + pub const fn with_checked(self) -> Self { + self.with_bit(BIT_CHECKED) + } + + /// Get the trap code to report if this memory access traps. + /// + /// A `None` trap code indicates that this memory access does not trap. + pub const fn trap_code(self) -> Option { + let byte = ((self.bits & MASK_TRAP_CODE) >> TRAP_CODE_OFFSET) as u8; + match NonZeroU8::new(byte) { + Some(code) => Some(TrapCode::from_raw(code)), + None => None, + } + } + + /// Configures these flags with the specified trap code `code`. + /// + /// A trap code indicates that this memory operation cannot be optimized + /// away and it must "stay where it is" in the programs. Traps are + /// considered side effects, for example, and have meaning through the trap + /// code that is communicated and which instruction trapped. + pub const fn with_trap_code(mut self, code: Option) -> Self { + let bits = match code { + Some(code) => code.as_raw().get() as u16, + None => 0, + }; + self.bits &= !MASK_TRAP_CODE; + self.bits |= bits << TRAP_CODE_OFFSET; + self + } +} + +impl fmt::Display for MemFlags { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self.trap_code() { + None => write!(f, " notrap")?, + // This is the default trap code, so don't print anything extra + // for this. + Some(TrapCode::HEAP_OUT_OF_BOUNDS) => {} + Some(t) => write!(f, " {t}")?, + } + if self.aligned() { + write!(f, " aligned")?; + } + if self.readonly() { + write!(f, " readonly")?; + } + if self.can_move() { + write!(f, " can_move")?; + } + if self.read_bit(BIT_BIG_ENDIAN) { + write!(f, " big")?; + } + if self.read_bit(BIT_LITTLE_ENDIAN) { + write!(f, " little")?; + } + if self.checked() { + write!(f, " checked")?; + } + match self.alias_region() { + None => {} + Some(AliasRegion::Heap) => write!(f, " heap")?, + Some(AliasRegion::Table) => write!(f, " table")?, + Some(AliasRegion::Vmctx) => write!(f, " vmctx")?, + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn roundtrip_traps() { + for trap in TrapCode::non_user_traps().iter().copied() { + let flags = MemFlags::new().with_trap_code(Some(trap)); + assert_eq!(flags.trap_code(), Some(trap)); + } + let flags = MemFlags::new().with_trap_code(None); + assert_eq!(flags.trap_code(), None); + } + + #[test] + fn cannot_set_big_and_little() { + let mut big = MemFlags::new().with_endianness(Endianness::Big); + assert!(big.set_by_name("little").is_err()); + + let mut little = MemFlags::new().with_endianness(Endianness::Little); + assert!(little.set_by_name("big").is_err()); + } + + #[test] + fn only_one_region() { + let mut big = MemFlags::new().with_alias_region(Some(AliasRegion::Heap)); + assert!(big.set_by_name("table").is_err()); + assert!(big.set_by_name("vmctx").is_err()); + + let mut big = MemFlags::new().with_alias_region(Some(AliasRegion::Table)); + assert!(big.set_by_name("heap").is_err()); + assert!(big.set_by_name("vmctx").is_err()); + + let mut big = MemFlags::new().with_alias_region(Some(AliasRegion::Vmctx)); + assert!(big.set_by_name("heap").is_err()); + assert!(big.set_by_name("table").is_err()); + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/memtype.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/memtype.rs new file mode 100644 index 000000000..4df26f164 --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/memtype.rs @@ -0,0 +1,190 @@ +//! Definitions for "memory types" in CLIF. +//! +//! A memory type is a struct-like definition -- fields with offsets, +//! each field having a type and possibly an attached fact -- that we +//! can use in proof-carrying code to validate accesses to structs and +//! propagate facts onto the loaded values as well. +//! +//! Memory types are meant to be rich enough to describe the *layout* +//! of values in memory, but do not necessarily need to embody +//! higher-level features such as subtyping directly. Rather, they +//! should encode an implementation of a type or object system. +//! +//! Note also that it is a non-goal for now for this type system to be +//! "complete" or fully orthogonal: we have some restrictions now +//! (e.g., struct fields are only primitives) because this is all we +//! need for existing PCC applications, and it keeps the +//! implementation simpler. +//! +//! There are a few basic kinds of types: +//! +//! - A struct is an aggregate of fields and an overall size. Each +//! field has a *primitive Cranelift type*. This is for simplicity's +//! sake: we do not allow nested memory types because to do so +//! invites cycles, requires recursive computation of sizes, creates +//! complicated questions when field types are dynamically-sized, +//! and in general is more complexity than we need. +//! +//! The expectation (validated by PCC) is that when a checked load +//! or store accesses memory typed by a memory type, accesses will +//! only be to fields at offsets named in the type, and will be via +//! the given Cranelift type -- i.e., no type-punning occurs in +//! memory. +//! +//! The overall size of the struct may be larger than that implied +//! by the fields because (i) we may not want or need to name all +//! the actually-existing fields in the memory type, and (ii) there +//! may be alignment padding that we also don't want or need to +//! represent explicitly. +//! +//! - A static memory is an untyped blob of storage with a static +//! size. This is memory that can be accessed with any type of load +//! or store at any valid offset. +//! +//! Note that this is *distinct* from an "array of u8" kind of +//! representation of memory, if/when we can represent such a thing, +//! because the expectation with memory types' fields (including +//! array elements) is that they are strongly typed, only accessed +//! via that type, and not type-punned. We don't want to imply any +//! restriction on load/store size, or any actual structure, with +//! untyped memory; it's just a blob. +//! +//! Eventually we plan to also have: +//! +//! - A dynamic array is a sequence of struct memory types, with a +//! length given by a global value (GV). This is useful to model, +//! e.g., tables. +//! +//! - A discriminated union is a union of several memory types +//! together with a tag field. This will be useful to model and +//! verify subtyping/downcasting for Wasm GC, among other uses. +//! +//! - Nullability on pointer fields: the fact will hold only if the +//! field is not null (all zero bits). + +use crate::ir::pcc::Fact; +use crate::ir::{GlobalValue, Type}; +use alloc::vec::Vec; + +#[cfg(feature = "enable-serde")] +use serde_derive::{Deserialize, Serialize}; + +/// Data defining a memory type. +/// +/// A memory type corresponds to a layout of data in memory. It may +/// have a statically-known or dynamically-known size. +#[derive(Clone, PartialEq, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub enum MemoryTypeData { + /// An aggregate consisting of certain fields at certain offsets. + /// + /// Fields must be sorted by offset, must be within the struct's + /// overall size, and must not overlap. These conditions are + /// checked by the CLIF verifier. + Struct { + /// Size of this type. + size: u64, + + /// Fields in this type. Sorted by offset. + fields: Vec, + }, + + /// A statically-sized untyped blob of memory. + Memory { + /// Accessible size. + size: u64, + }, + + /// A dynamically-sized untyped blob of memory, with bound given + /// by a global value plus some static amount. + DynamicMemory { + /// Static part of size. + size: u64, + /// Dynamic part of size. + gv: GlobalValue, + }, + + /// A type with no size. + Empty, +} + +impl std::default::Default for MemoryTypeData { + fn default() -> Self { + Self::Empty + } +} + +impl std::fmt::Display for MemoryTypeData { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Self::Struct { size, fields } => { + write!(f, "struct {size} {{")?; + let mut first = true; + for field in fields { + if first { + first = false; + } else { + write!(f, ",")?; + } + write!(f, " {}: {}", field.offset, field.ty)?; + if field.readonly { + write!(f, " readonly")?; + } + if let Some(fact) = &field.fact { + write!(f, " ! {fact}")?; + } + } + write!(f, " }}")?; + Ok(()) + } + Self::Memory { size } => { + write!(f, "memory {size:#x}") + } + Self::DynamicMemory { size, gv } => { + write!(f, "dynamic_memory {gv}+{size:#x}") + } + Self::Empty => { + write!(f, "empty") + } + } + } +} + +/// One field in a memory type. +#[derive(Clone, PartialEq, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct MemoryTypeField { + /// The offset of this field in the memory type. + pub offset: u64, + /// The primitive type of the value in this field. Accesses to the + /// field must use this type (i.e., cannot bitcast/type-pun in + /// memory). + pub ty: Type, + /// A proof-carrying-code fact about this value, if any. + pub fact: Option, + /// Whether this field is read-only, i.e., stores should be + /// disallowed. + pub readonly: bool, +} + +impl MemoryTypeField { + /// Get the fact, if any, on a field. + pub fn fact(&self) -> Option<&Fact> { + self.fact.as_ref() + } +} + +impl MemoryTypeData { + /// Provide the static size of this type, if known. + /// + /// (The size may not be known for dynamically-sized arrays or + /// memories, when those memtype kinds are added.) + pub fn static_size(&self) -> Option { + match self { + Self::Struct { size, .. } => Some(*size), + Self::Memory { size } => Some(*size), + Self::DynamicMemory { .. } => None, + Self::Empty => Some(0), + } + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/mod.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/mod.rs new file mode 100644 index 000000000..e6f082d70 --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/mod.rs @@ -0,0 +1,110 @@ +//! Representation of Cranelift IR functions. + +mod atomic_rmw_op; +mod builder; +pub mod condcodes; +pub mod constant; +pub mod dfg; +pub mod dynamic_type; +pub mod entities; +mod extfunc; +mod extname; +pub mod function; +mod globalvalue; +pub mod immediates; +pub mod instructions; +pub mod jumptable; +pub(crate) mod known_symbol; +pub mod layout; +pub(crate) mod libcall; +mod memflags; +mod memtype; +pub mod pcc; +mod progpoint; +mod sourceloc; +pub mod stackslot; +mod trapcode; +pub mod types; +mod user_stack_maps; + +#[cfg(feature = "enable-serde")] +use serde_derive::{Deserialize, Serialize}; + +pub use crate::ir::atomic_rmw_op::AtomicRmwOp; +pub use crate::ir::builder::{ + InsertBuilder, InstBuilder, InstBuilderBase, InstInserterBase, ReplaceBuilder, +}; +pub use crate::ir::constant::{ConstantData, ConstantPool}; +pub use crate::ir::dfg::{BlockData, DataFlowGraph, ValueDef}; +pub use crate::ir::dynamic_type::{dynamic_to_fixed, DynamicTypeData, DynamicTypes}; +pub use crate::ir::entities::{ + Block, Constant, DynamicStackSlot, DynamicType, FuncRef, GlobalValue, Immediate, Inst, + JumpTable, MemoryType, SigRef, StackSlot, UserExternalNameRef, Value, +}; +pub use crate::ir::extfunc::{ + AbiParam, ArgumentExtension, ArgumentPurpose, ExtFuncData, Signature, +}; +pub use crate::ir::extname::{ExternalName, UserExternalName, UserFuncName}; +pub use crate::ir::function::Function; +pub use crate::ir::globalvalue::GlobalValueData; +pub use crate::ir::instructions::{ + BlockCall, InstructionData, Opcode, ValueList, ValueListPool, VariableArgs, +}; +pub use crate::ir::jumptable::JumpTableData; +pub use crate::ir::known_symbol::KnownSymbol; +pub use crate::ir::layout::Layout; +pub use crate::ir::libcall::{get_probestack_funcref, LibCall}; +pub use crate::ir::memflags::{AliasRegion, Endianness, MemFlags}; +pub use crate::ir::memtype::{MemoryTypeData, MemoryTypeField}; +pub use crate::ir::pcc::{BaseExpr, Expr, Fact, FactContext, PccError, PccResult}; +pub use crate::ir::progpoint::ProgramPoint; +pub use crate::ir::sourceloc::RelSourceLoc; +pub use crate::ir::sourceloc::SourceLoc; +pub use crate::ir::stackslot::{ + DynamicStackSlotData, DynamicStackSlots, StackSlotData, StackSlotKind, StackSlots, +}; +pub use crate::ir::trapcode::TrapCode; +pub use crate::ir::types::Type; +pub use crate::ir::user_stack_maps::{UserStackMap, UserStackMapEntry}; + +use crate::entity::{entity_impl, PrimaryMap, SecondaryMap}; + +/// Map of jump tables. +pub type JumpTables = PrimaryMap; + +/// Source locations for instructions. +pub(crate) type SourceLocs = SecondaryMap; + +/// Marked with a label value. +#[derive(Copy, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct ValueLabel(u32); +entity_impl!(ValueLabel, "val"); + +/// A label of a Value. +#[derive(Debug, Clone, PartialEq, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct ValueLabelStart { + /// Source location when it is in effect + pub from: RelSourceLoc, + + /// The label index. + pub label: ValueLabel, +} + +/// Value label assignments: label starts or value aliases. +#[derive(Debug, Clone, PartialEq, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub enum ValueLabelAssignments { + /// Original value labels assigned at transform. + Starts(alloc::vec::Vec), + + /// A value alias to original value. + Alias { + /// Source location when it is in effect + from: RelSourceLoc, + + /// The label index. + value: Value, + }, +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/pcc.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/pcc.rs new file mode 100644 index 000000000..624e18753 --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/pcc.rs @@ -0,0 +1,1682 @@ +//! Proof-carrying code. We attach "facts" to values and then check +//! that they remain true after compilation. +//! +//! A few key design principle of this approach are: +//! +//! - The producer of the IR provides the axioms. All "ground truth", +//! such as what memory is accessible -- is meant to come by way of +//! facts on the function arguments and global values. In some +//! sense, all we are doing here is validating the "internal +//! consistency" of the facts that are provided on values, and the +//! actions performed on those values. +//! +//! - We do not derive and forward-propagate facts eagerly. Rather, +//! the producer needs to provide breadcrumbs -- a "proof witness" +//! of sorts -- to allow the checking to complete. That means that +//! as an address is computed, or pointer chains are dereferenced, +//! each intermediate value will likely have some fact attached. +//! +//! This does create more verbose IR, but a significant positive +//! benefit is that it avoids unnecessary work: we do not build up a +//! knowledge base that effectively encodes the integer ranges of +//! many or most values in the program. Rather, we only check +//! specifically the memory-access sequences. In practice, each such +//! sequence is likely to be a carefully-controlled sequence of IR +//! operations from, e.g., a sandboxing compiler (such as +//! Wasmtime) so adding annotations here to communicate +//! intent (ranges, bounds-checks, and the like) is no problem. +//! +//! Facts are attached to SSA values in CLIF, and are maintained +//! through optimizations and through lowering. They are thus also +//! present on VRegs in the VCode. In theory, facts could be checked +//! at either level, though in practice it is most useful to check +//! them at the VCode level if the goal is an end-to-end verification +//! of certain properties (e.g., memory sandboxing). +//! +//! Checking facts entails visiting each instruction that defines a +//! value with a fact, and checking the result's fact against the +//! facts on arguments and the operand. For VCode, this is +//! fundamentally a question of the target ISA's semantics, so we call +//! into the `LowerBackend` for this. Note that during checking there +//! is also limited forward propagation / inference, but only within +//! an instruction: for example, an addressing mode commonly can +//! include an addition, multiplication/shift, or extend operation, +//! and there is no way to attach facts to the intermediate values +//! "inside" the instruction, so instead the backend can use +//! `FactContext::add()` and friends to forward-propagate facts. +//! +//! TODO: +//! +//! Deployment: +//! - Add to fuzzing +//! - Turn on during wasm spec-tests +//! +//! More checks: +//! - Check that facts on `vmctx` GVs are subsumed by the actual facts +//! on the vmctx arg in block0 (function arg). +//! +//! Generality: +//! - facts on outputs (in func signature)? +//! - Implement checking at the CLIF level as well. +//! - Check instructions that can trap as well? +//! +//! Nicer errors: +//! - attach instruction index or some other identifier to errors +//! +//! Text format cleanup: +//! - make the bitwidth on `max` facts optional in the CLIF text +//! format? +//! - make offset in `mem` fact optional in the text format? +//! +//! Bikeshed colors (syntax): +//! - Put fact bang-annotations after types? +//! `v0: i64 ! fact(..)` vs. `v0 ! fact(..): i64` + +use crate::ir; +use crate::ir::types::*; +use crate::isa::TargetIsa; +use crate::machinst::{BlockIndex, LowerBackend, VCode}; +use crate::trace; +use regalloc2::Function as _; +use std::fmt; + +#[cfg(feature = "enable-serde")] +use serde_derive::{Deserialize, Serialize}; + +/// The result of checking proof-carrying-code facts. +pub type PccResult = std::result::Result; + +/// An error or inconsistency discovered when checking proof-carrying +/// code. +#[derive(Debug, Clone)] +pub enum PccError { + /// An operation wraps around, invalidating the stated value + /// range. + Overflow, + /// An input to an operator that produces a fact-annotated value + /// does not have a fact describing it, and one is needed. + MissingFact, + /// A derivation of an output fact is unsupported (incorrect or + /// not derivable). + UnsupportedFact, + /// A block parameter claims a fact that one of its predecessors + /// does not support. + UnsupportedBlockparam, + /// A memory access is out of bounds. + OutOfBounds, + /// Proof-carrying-code checking is not implemented for a + /// particular compiler backend. + UnimplementedBackend, + /// Proof-carrying-code checking is not implemented for a + /// particular instruction that instruction-selection chose. This + /// is an internal compiler error. + UnimplementedInst, + /// Access to an invalid or undefined field offset in a struct. + InvalidFieldOffset, + /// Access to a field via the wrong type. + BadFieldType, + /// Store to a read-only field. + WriteToReadOnlyField, + /// Store of data to a field with a fact that does not subsume the + /// field's fact. + InvalidStoredFact, +} + +/// A fact on a value. +#[derive(Clone, Debug, Hash, PartialEq, Eq)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub enum Fact { + /// A bitslice of a value (up to a bitwidth) is within the given + /// integer range. + /// + /// The slicing behavior is needed because this fact can describe + /// both an SSA `Value`, whose entire value is well-defined, and a + /// `VReg` in VCode, whose bits beyond the type stored in that + /// register are don't-care (undefined). + Range { + /// The bitwidth of bits we care about, from the LSB upward. + bit_width: u16, + /// The minimum value that the bitslice can take + /// (inclusive). The range is unsigned: the specified bits of + /// the actual value will be greater than or equal to this + /// value, as evaluated by an unsigned integer comparison. + min: u64, + /// The maximum value that the bitslice can take + /// (inclusive). The range is unsigned: the specified bits of + /// the actual value will be less than or equal to this value, + /// as evaluated by an unsigned integer comparison. + max: u64, + }, + + /// A value bounded by a global value. + /// + /// The range is in `(min_GV + min_offset)..(max_GV + + /// max_offset)`, inclusive on the lower and upper bound. + DynamicRange { + /// The bitwidth of bits we care about, from the LSB upward. + bit_width: u16, + /// The lower bound, inclusive. + min: Expr, + /// The upper bound, inclusive. + max: Expr, + }, + + /// A pointer to a memory type. + Mem { + /// The memory type. + ty: ir::MemoryType, + /// The minimum offset into the memory type, inclusive. + min_offset: u64, + /// The maximum offset into the memory type, inclusive. + max_offset: u64, + /// This pointer can also be null. + nullable: bool, + }, + + /// A pointer to a memory type, dynamically bounded. The pointer + /// is within `(GV_min+offset_min)..(GV_max+offset_max)` + /// (inclusive on both ends) in the memory type. + DynamicMem { + /// The memory type. + ty: ir::MemoryType, + /// The lower bound, inclusive. + min: Expr, + /// The upper bound, inclusive. + max: Expr, + /// This pointer can also be null. + nullable: bool, + }, + + /// A definition of a value to be used as a symbol in + /// BaseExprs. There can only be one of these per value number. + /// + /// Note that this differs from a `DynamicRange` specifying that + /// some value in the program is the same as `value`. A `def(v1)` + /// fact is propagated to machine code and serves as a source of + /// truth: the value or location labeled with this fact *defines* + /// what `v1` is, and any `dynamic_range(64, v1, v1)`-labeled + /// values elsewhere are claiming to be equal to this value. + /// + /// This is necessary because we don't propagate SSA value labels + /// down to machine code otherwise; so when referring symbolically + /// to addresses and expressions derived from addresses, we need + /// to introduce the symbol first. + Def { + /// The SSA value this value defines. + value: ir::Value, + }, + + /// A comparison result between two dynamic values with a + /// comparison of a certain kind. + Compare { + /// The kind of comparison. + kind: ir::condcodes::IntCC, + /// The left-hand side of the comparison. + lhs: Expr, + /// The right-hand side of the comparison. + rhs: Expr, + }, + + /// A "conflict fact": this fact results from merging two other + /// facts, and it can never be satisfied -- checking any value + /// against this fact will fail. + Conflict, +} + +/// A bound expression. +#[derive(Clone, Debug, Hash, PartialEq, Eq)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct Expr { + /// The dynamic (base) part. + pub base: BaseExpr, + /// The static (offset) part. + pub offset: i64, +} + +/// The base part of a bound expression. +#[derive(Clone, Debug, Hash, PartialEq, Eq)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub enum BaseExpr { + /// No dynamic part (i.e., zero). + None, + /// A global value. + GlobalValue(ir::GlobalValue), + /// An SSA Value as a symbolic value. This can be referenced in + /// facts even after we've lowered out of SSA: it becomes simply + /// some symbolic value. + Value(ir::Value), + /// Top of the address space. This is "saturating": the offset + /// doesn't matter. + Max, +} + +impl BaseExpr { + /// Is one base less than or equal to another? (We can't always + /// know; in such cases, returns `false`.) + fn le(lhs: &BaseExpr, rhs: &BaseExpr) -> bool { + // (i) reflexivity; (ii) 0 <= x for all (unsigned) x; (iii) x <= max for all x. + lhs == rhs || *lhs == BaseExpr::None || *rhs == BaseExpr::Max + } + + /// Compute some BaseExpr that will be less than or equal to both + /// inputs. This is a generalization of `min` (but looser). + fn min(lhs: &BaseExpr, rhs: &BaseExpr) -> BaseExpr { + if lhs == rhs { + lhs.clone() + } else if *lhs == BaseExpr::Max { + rhs.clone() + } else if *rhs == BaseExpr::Max { + lhs.clone() + } else { + BaseExpr::None // zero is <= x for all (unsigned) x. + } + } + + /// Compute some BaseExpr that will be greater than or equal to + /// both inputs. + fn max(lhs: &BaseExpr, rhs: &BaseExpr) -> BaseExpr { + if lhs == rhs { + lhs.clone() + } else if *lhs == BaseExpr::None { + rhs.clone() + } else if *rhs == BaseExpr::None { + lhs.clone() + } else { + BaseExpr::Max + } + } +} + +impl Expr { + /// Constant value. + pub fn constant(offset: i64) -> Self { + Expr { + base: BaseExpr::None, + offset, + } + } + + /// The value of an SSA value. + pub fn value(value: ir::Value) -> Self { + Expr { + base: BaseExpr::Value(value), + offset: 0, + } + } + + /// The value of a global value. + pub fn global_value(gv: ir::GlobalValue) -> Self { + Expr { + base: BaseExpr::GlobalValue(gv), + offset: 0, + } + } + + /// Is one expression definitely less than or equal to another? + /// (We can't always know; in such cases, returns `false`.) + fn le(lhs: &Expr, rhs: &Expr) -> bool { + if rhs.base == BaseExpr::Max { + true + } else { + BaseExpr::le(&lhs.base, &rhs.base) && lhs.offset <= rhs.offset + } + } + + /// Generalization of `min`: compute some Expr that is less than + /// or equal to both inputs. + fn min(lhs: &Expr, rhs: &Expr) -> Expr { + if lhs.base == BaseExpr::None && lhs.offset == 0 { + lhs.clone() + } else if rhs.base == BaseExpr::None && rhs.offset == 0 { + rhs.clone() + } else { + Expr { + base: BaseExpr::min(&lhs.base, &rhs.base), + offset: std::cmp::min(lhs.offset, rhs.offset), + } + } + } + + /// Generalization of `max`: compute some Expr that is greater + /// than or equal to both inputs. + fn max(lhs: &Expr, rhs: &Expr) -> Expr { + if lhs.base == BaseExpr::None && lhs.offset == 0 { + rhs.clone() + } else if rhs.base == BaseExpr::None && rhs.offset == 0 { + lhs.clone() + } else { + Expr { + base: BaseExpr::max(&lhs.base, &rhs.base), + offset: std::cmp::max(lhs.offset, rhs.offset), + } + } + } + + /// Add one expression to another. + fn add(lhs: &Expr, rhs: &Expr) -> Option { + if lhs.base == rhs.base { + Some(Expr { + base: lhs.base.clone(), + offset: lhs.offset.checked_add(rhs.offset)?, + }) + } else if lhs.base == BaseExpr::None { + Some(Expr { + base: rhs.base.clone(), + offset: lhs.offset.checked_add(rhs.offset)?, + }) + } else if rhs.base == BaseExpr::None { + Some(Expr { + base: lhs.base.clone(), + offset: lhs.offset.checked_add(rhs.offset)?, + }) + } else { + Some(Expr { + base: BaseExpr::Max, + offset: 0, + }) + } + } + + /// Add a static offset to an expression. + pub fn offset(lhs: &Expr, rhs: i64) -> Option { + let offset = lhs.offset.checked_add(rhs)?; + Some(Expr { + base: lhs.base.clone(), + offset, + }) + } + + /// Is this Expr a BaseExpr with no offset? Return it if so. + pub fn without_offset(&self) -> Option<&BaseExpr> { + if self.offset == 0 { + Some(&self.base) + } else { + None + } + } +} + +impl fmt::Display for BaseExpr { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + BaseExpr::None => Ok(()), + BaseExpr::Max => write!(f, "max"), + BaseExpr::GlobalValue(gv) => write!(f, "{gv}"), + BaseExpr::Value(value) => write!(f, "{value}"), + } + } +} + +impl BaseExpr { + /// Does this dynamic_expression take an offset? + pub fn is_some(&self) -> bool { + match self { + BaseExpr::None => false, + _ => true, + } + } +} + +impl fmt::Display for Expr { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.base)?; + match self.offset { + offset if offset > 0 && self.base.is_some() => write!(f, "+{offset:#x}"), + offset if offset > 0 => write!(f, "{offset:#x}"), + offset if offset < 0 => { + let negative_offset = -i128::from(offset); // upcast to support i64::MIN. + write!(f, "-{negative_offset:#x}") + } + 0 if self.base.is_some() => Ok(()), + 0 => write!(f, "0"), + _ => unreachable!(), + } + } +} + +impl fmt::Display for Fact { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Fact::Range { + bit_width, + min, + max, + } => write!(f, "range({bit_width}, {min:#x}, {max:#x})"), + Fact::DynamicRange { + bit_width, + min, + max, + } => { + write!(f, "dynamic_range({bit_width}, {min}, {max})") + } + Fact::Mem { + ty, + min_offset, + max_offset, + nullable, + } => { + let nullable_flag = if *nullable { ", nullable" } else { "" }; + write!( + f, + "mem({ty}, {min_offset:#x}, {max_offset:#x}{nullable_flag})" + ) + } + Fact::DynamicMem { + ty, + min, + max, + nullable, + } => { + let nullable_flag = if *nullable { ", nullable" } else { "" }; + write!(f, "dynamic_mem({ty}, {min}, {max}{nullable_flag})") + } + Fact::Def { value } => write!(f, "def({value})"), + Fact::Compare { kind, lhs, rhs } => { + write!(f, "compare({kind}, {lhs}, {rhs})") + } + Fact::Conflict => write!(f, "conflict"), + } + } +} + +impl Fact { + /// Create a range fact that specifies a single known constant value. + pub fn constant(bit_width: u16, value: u64) -> Self { + debug_assert!(value <= max_value_for_width(bit_width)); + // `min` and `max` are inclusive, so this specifies a range of + // exactly one value. + Fact::Range { + bit_width, + min: value, + max: value, + } + } + + /// Create a dynamic range fact that points to the base of a dynamic memory. + pub fn dynamic_base_ptr(ty: ir::MemoryType) -> Self { + Fact::DynamicMem { + ty, + min: Expr::constant(0), + max: Expr::constant(0), + nullable: false, + } + } + + /// Create a fact that specifies the value is exactly an SSA value. + /// + /// Note that this differs from a `def` fact: it is not *defining* + /// a symbol to have the value that this fact is attached to; + /// rather it is claiming that this value is the same as whatever + /// that symbol is. (In other words, the def should be elsewhere, + /// and we are tying ourselves to it.) + pub fn value(bit_width: u16, value: ir::Value) -> Self { + Fact::DynamicRange { + bit_width, + min: Expr::value(value), + max: Expr::value(value), + } + } + + /// Create a fact that specifies the value is exactly an SSA value plus some offset. + pub fn value_offset(bit_width: u16, value: ir::Value, offset: i64) -> Self { + Fact::DynamicRange { + bit_width, + min: Expr::offset(&Expr::value(value), offset).unwrap(), + max: Expr::offset(&Expr::value(value), offset).unwrap(), + } + } + + /// Create a fact that specifies the value is exactly the value of a GV. + pub fn global_value(bit_width: u16, gv: ir::GlobalValue) -> Self { + Fact::DynamicRange { + bit_width, + min: Expr::global_value(gv), + max: Expr::global_value(gv), + } + } + + /// Create a fact that specifies the value is exactly the value of a GV plus some offset. + pub fn global_value_offset(bit_width: u16, gv: ir::GlobalValue, offset: i64) -> Self { + Fact::DynamicRange { + bit_width, + min: Expr::offset(&Expr::global_value(gv), offset).unwrap(), + max: Expr::offset(&Expr::global_value(gv), offset).unwrap(), + } + } + + /// Create a range fact that specifies the maximum range for a + /// value of the given bit-width. + pub const fn max_range_for_width(bit_width: u16) -> Self { + match bit_width { + bit_width if bit_width < 64 => Fact::Range { + bit_width, + min: 0, + max: (1u64 << bit_width) - 1, + }, + 64 => Fact::Range { + bit_width: 64, + min: 0, + max: u64::MAX, + }, + _ => panic!("bit width too large!"), + } + } + + /// Create a range fact that specifies the maximum range for a + /// value of the given bit-width, zero-extended into a wider + /// width. + pub const fn max_range_for_width_extended(from_width: u16, to_width: u16) -> Self { + debug_assert!(from_width <= to_width); + match from_width { + from_width if from_width < 64 => Fact::Range { + bit_width: to_width, + min: 0, + max: (1u64 << from_width) - 1, + }, + 64 => Fact::Range { + bit_width: to_width, + min: 0, + max: u64::MAX, + }, + _ => panic!("bit width too large!"), + } + } + + /// Try to infer a minimal fact for a value of the given IR type. + pub fn infer_from_type(ty: ir::Type) -> Option<&'static Self> { + static FACTS: [Fact; 4] = [ + Fact::max_range_for_width(8), + Fact::max_range_for_width(16), + Fact::max_range_for_width(32), + Fact::max_range_for_width(64), + ]; + match ty { + I8 => Some(&FACTS[0]), + I16 => Some(&FACTS[1]), + I32 => Some(&FACTS[2]), + I64 => Some(&FACTS[3]), + _ => None, + } + } + + /// Does this fact "propagate" automatically, i.e., cause + /// instructions that process it to infer their own output facts? + /// Not all facts propagate automatically; otherwise, verification + /// would be much slower. + pub fn propagates(&self) -> bool { + match self { + Fact::Mem { .. } => true, + _ => false, + } + } + + /// Is this a constant value of the given bitwidth? Return it as a + /// `Some(value)` if so. + pub fn as_const(&self, bits: u16) -> Option { + match self { + Fact::Range { + bit_width, + min, + max, + } if *bit_width == bits && min == max => Some(*min), + _ => None, + } + } + + /// Is this fact a single-value range with a symbolic Expr? + pub fn as_symbol(&self) -> Option<&Expr> { + match self { + Fact::DynamicRange { min, max, .. } if min == max => Some(min), + _ => None, + } + } + + /// Merge two facts. We take the *intersection*: that is, we know + /// both facts to be true, so we can intersect ranges. (This + /// differs from the usual static analysis approach, where we are + /// merging multiple possibilities into a generalized / widened + /// fact. We want to narrow here.) + pub fn intersect(a: &Fact, b: &Fact) -> Fact { + match (a, b) { + ( + Fact::Range { + bit_width: bw_lhs, + min: min_lhs, + max: max_lhs, + }, + Fact::Range { + bit_width: bw_rhs, + min: min_rhs, + max: max_rhs, + }, + ) if bw_lhs == bw_rhs && max_lhs >= min_rhs && max_rhs >= min_lhs => Fact::Range { + bit_width: *bw_lhs, + min: std::cmp::max(*min_lhs, *min_rhs), + max: std::cmp::min(*max_lhs, *max_rhs), + }, + + ( + Fact::DynamicRange { + bit_width: bw_lhs, + min: min_lhs, + max: max_lhs, + }, + Fact::DynamicRange { + bit_width: bw_rhs, + min: min_rhs, + max: max_rhs, + }, + ) if bw_lhs == bw_rhs && Expr::le(min_rhs, max_lhs) && Expr::le(min_lhs, max_rhs) => { + Fact::DynamicRange { + bit_width: *bw_lhs, + min: Expr::max(min_lhs, min_rhs), + max: Expr::min(max_lhs, max_rhs), + } + } + + ( + Fact::Mem { + ty: ty_lhs, + min_offset: min_offset_lhs, + max_offset: max_offset_lhs, + nullable: nullable_lhs, + }, + Fact::Mem { + ty: ty_rhs, + min_offset: min_offset_rhs, + max_offset: max_offset_rhs, + nullable: nullable_rhs, + }, + ) if ty_lhs == ty_rhs + && max_offset_lhs >= min_offset_rhs + && max_offset_rhs >= min_offset_lhs => + { + Fact::Mem { + ty: *ty_lhs, + min_offset: std::cmp::max(*min_offset_lhs, *min_offset_rhs), + max_offset: std::cmp::min(*max_offset_lhs, *max_offset_rhs), + nullable: *nullable_lhs && *nullable_rhs, + } + } + + ( + Fact::DynamicMem { + ty: ty_lhs, + min: min_lhs, + max: max_lhs, + nullable: null_lhs, + }, + Fact::DynamicMem { + ty: ty_rhs, + min: min_rhs, + max: max_rhs, + nullable: null_rhs, + }, + ) if ty_lhs == ty_rhs && Expr::le(min_rhs, max_lhs) && Expr::le(min_lhs, max_rhs) => { + Fact::DynamicMem { + ty: *ty_lhs, + min: Expr::max(min_lhs, min_rhs), + max: Expr::min(max_lhs, max_rhs), + nullable: *null_lhs && *null_rhs, + } + } + + _ => Fact::Conflict, + } + } +} + +macro_rules! ensure { + ( $condition:expr, $err:tt $(,)? ) => { + if !$condition { + return Err(PccError::$err); + } + }; +} + +macro_rules! bail { + ( $err:tt ) => {{ + return Err(PccError::$err); + }}; +} + +/// The two kinds of inequalities: "strict" (`<`, `>`) and "loose" +/// (`<=`, `>=`), the latter of which admit equality. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum InequalityKind { + /// Strict inequality: {less,greater}-than. + Strict, + /// Loose inequality: {less,greater}-than-or-equal. + Loose, +} + +/// A "context" in which we can evaluate and derive facts. This +/// context carries environment/global properties, such as the machine +/// pointer width. +pub struct FactContext<'a> { + function: &'a ir::Function, + pointer_width: u16, +} + +impl<'a> FactContext<'a> { + /// Create a new "fact context" in which to evaluate facts. + pub fn new(function: &'a ir::Function, pointer_width: u16) -> Self { + FactContext { + function, + pointer_width, + } + } + + /// Computes whether `lhs` "subsumes" (implies) `rhs`. + pub fn subsumes(&self, lhs: &Fact, rhs: &Fact) -> bool { + match (lhs, rhs) { + // Reflexivity. + (l, r) if l == r => true, + + ( + Fact::Range { + bit_width: bw_lhs, + min: min_lhs, + max: max_lhs, + }, + Fact::Range { + bit_width: bw_rhs, + min: min_rhs, + max: max_rhs, + }, + ) => { + // If the bitwidths we're claiming facts about are the + // same, or the left-hand-side makes a claim about a + // wider bitwidth, and if the right-hand-side range is + // larger than the left-hand-side range, than the LHS + // subsumes the RHS. + // + // In other words, we can always expand the claimed + // possible value range. + bw_lhs >= bw_rhs && max_lhs <= max_rhs && min_lhs >= min_rhs + } + + ( + Fact::DynamicRange { + bit_width: bw_lhs, + min: min_lhs, + max: max_lhs, + }, + Fact::DynamicRange { + bit_width: bw_rhs, + min: min_rhs, + max: max_rhs, + }, + ) => { + // Nearly same as above, but with dynamic-expression + // comparisons. Note that we require equal bitwidths + // here: unlike in the static case, we don't have + // fixed values for min and max, so we can't lean on + // the well-formedness requirements of the static + // ranges fitting within the bit-width max. + bw_lhs == bw_rhs && Expr::le(max_lhs, max_rhs) && Expr::le(min_rhs, min_lhs) + } + + ( + Fact::Mem { + ty: ty_lhs, + min_offset: min_offset_lhs, + max_offset: max_offset_lhs, + nullable: nullable_lhs, + }, + Fact::Mem { + ty: ty_rhs, + min_offset: min_offset_rhs, + max_offset: max_offset_rhs, + nullable: nullable_rhs, + }, + ) => { + ty_lhs == ty_rhs + && max_offset_lhs <= max_offset_rhs + && min_offset_lhs >= min_offset_rhs + && (*nullable_lhs || !*nullable_rhs) + } + + ( + Fact::DynamicMem { + ty: ty_lhs, + min: min_lhs, + max: max_lhs, + nullable: nullable_lhs, + }, + Fact::DynamicMem { + ty: ty_rhs, + min: min_rhs, + max: max_rhs, + nullable: nullable_rhs, + }, + ) => { + ty_lhs == ty_rhs + && Expr::le(max_lhs, max_rhs) + && Expr::le(min_rhs, min_lhs) + && (*nullable_lhs || !*nullable_rhs) + } + + // Constant zero subsumes nullable DynamicMem pointers. + ( + Fact::Range { + bit_width, + min: 0, + max: 0, + }, + Fact::DynamicMem { nullable: true, .. }, + ) if *bit_width == self.pointer_width => true, + + // Any fact subsumes a Def, because the Def makes no + // claims about the actual value (it ties a symbol to that + // value, but the value is fed to the symbol, not the + // other way around). + (_, Fact::Def { .. }) => true, + + _ => false, + } + } + + /// Computes whether the optional fact `lhs` subsumes (implies) + /// the optional fact `lhs`. A `None` never subsumes any fact, and + /// is always subsumed by any fact at all (or no fact). + pub fn subsumes_fact_optionals(&self, lhs: Option<&Fact>, rhs: Option<&Fact>) -> bool { + match (lhs, rhs) { + (None, None) => true, + (Some(_), None) => true, + (None, Some(_)) => false, + (Some(lhs), Some(rhs)) => self.subsumes(lhs, rhs), + } + } + + /// Computes whatever fact can be known about the sum of two + /// values with attached facts. The add is performed to the given + /// bit-width. Note that this is distinct from the machine or + /// pointer width: e.g., many 64-bit machines can still do 32-bit + /// adds that wrap at 2^32. + pub fn add(&self, lhs: &Fact, rhs: &Fact, add_width: u16) -> Option { + let result = match (lhs, rhs) { + ( + Fact::Range { + bit_width: bw_lhs, + min: min_lhs, + max: max_lhs, + }, + Fact::Range { + bit_width: bw_rhs, + min: min_rhs, + max: max_rhs, + }, + ) if bw_lhs == bw_rhs && add_width >= *bw_lhs => { + let computed_min = min_lhs.checked_add(*min_rhs)?; + let computed_max = max_lhs.checked_add(*max_rhs)?; + let computed_max = std::cmp::min(max_value_for_width(add_width), computed_max); + Some(Fact::Range { + bit_width: *bw_lhs, + min: computed_min, + max: computed_max, + }) + } + + ( + Fact::Range { + bit_width: bw_max, + min, + max, + }, + Fact::Mem { + ty, + min_offset, + max_offset, + nullable, + }, + ) + | ( + Fact::Mem { + ty, + min_offset, + max_offset, + nullable, + }, + Fact::Range { + bit_width: bw_max, + min, + max, + }, + ) if *bw_max >= self.pointer_width + && add_width >= *bw_max + && (!*nullable || *max == 0) => + { + let min_offset = min_offset.checked_add(*min)?; + let max_offset = max_offset.checked_add(*max)?; + Some(Fact::Mem { + ty: *ty, + min_offset, + max_offset, + nullable: false, + }) + } + + ( + Fact::Range { + bit_width: bw_static, + min: min_static, + max: max_static, + }, + Fact::DynamicRange { + bit_width: bw_dynamic, + min: min_dynamic, + max: max_dynamic, + }, + ) + | ( + Fact::DynamicRange { + bit_width: bw_dynamic, + min: min_dynamic, + max: max_dynamic, + }, + Fact::Range { + bit_width: bw_static, + min: min_static, + max: max_static, + }, + ) if bw_static == bw_dynamic => { + let min = Expr::offset(min_dynamic, i64::try_from(*min_static).ok()?)?; + let max = Expr::offset(max_dynamic, i64::try_from(*max_static).ok()?)?; + Some(Fact::DynamicRange { + bit_width: *bw_dynamic, + min, + max, + }) + } + + ( + Fact::DynamicMem { + ty, + min: min_mem, + max: max_mem, + nullable: false, + }, + Fact::DynamicRange { + bit_width, + min: min_range, + max: max_range, + }, + ) + | ( + Fact::DynamicRange { + bit_width, + min: min_range, + max: max_range, + }, + Fact::DynamicMem { + ty, + min: min_mem, + max: max_mem, + nullable: false, + }, + ) if *bit_width == self.pointer_width => { + let min = Expr::add(min_mem, min_range)?; + let max = Expr::add(max_mem, max_range)?; + Some(Fact::DynamicMem { + ty: *ty, + min, + max, + nullable: false, + }) + } + + ( + Fact::Mem { + ty, + min_offset, + max_offset, + nullable: false, + }, + Fact::DynamicRange { + bit_width, + min: min_range, + max: max_range, + }, + ) + | ( + Fact::DynamicRange { + bit_width, + min: min_range, + max: max_range, + }, + Fact::Mem { + ty, + min_offset, + max_offset, + nullable: false, + }, + ) if *bit_width == self.pointer_width => { + let min = Expr::offset(min_range, i64::try_from(*min_offset).ok()?)?; + let max = Expr::offset(max_range, i64::try_from(*max_offset).ok()?)?; + Some(Fact::DynamicMem { + ty: *ty, + min, + max, + nullable: false, + }) + } + + ( + Fact::Range { + bit_width: bw_static, + min: min_static, + max: max_static, + }, + Fact::DynamicMem { + ty, + min: min_dynamic, + max: max_dynamic, + nullable, + }, + ) + | ( + Fact::DynamicMem { + ty, + min: min_dynamic, + max: max_dynamic, + nullable, + }, + Fact::Range { + bit_width: bw_static, + min: min_static, + max: max_static, + }, + ) if *bw_static == self.pointer_width && (!*nullable || *max_static == 0) => { + let min = Expr::offset(min_dynamic, i64::try_from(*min_static).ok()?)?; + let max = Expr::offset(max_dynamic, i64::try_from(*max_static).ok()?)?; + Some(Fact::DynamicMem { + ty: *ty, + min, + max, + nullable: false, + }) + } + + _ => None, + }; + + trace!("add: {lhs:?} + {rhs:?} -> {result:?}"); + result + } + + /// Computes the `uextend` of a value with the given facts. + pub fn uextend(&self, fact: &Fact, from_width: u16, to_width: u16) -> Option { + if from_width == to_width { + return Some(fact.clone()); + } + + let result = match fact { + // If the claim is already for a same-or-wider value and the min + // and max are within range of the narrower value, we can + // claim the same range. + Fact::Range { + bit_width, + min, + max, + } if *bit_width >= from_width + && *min <= max_value_for_width(from_width) + && *max <= max_value_for_width(from_width) => + { + Some(Fact::Range { + bit_width: to_width, + min: *min, + max: *max, + }) + } + + // If the claim is a dynamic range for the from-width, we + // can extend to the to-width. + Fact::DynamicRange { + bit_width, + min, + max, + } if *bit_width == from_width => Some(Fact::DynamicRange { + bit_width: to_width, + min: min.clone(), + max: max.clone(), + }), + + // If the claim is a definition of a value, we can say + // that the output has a range of exactly that value. + Fact::Def { value } => Some(Fact::value(to_width, *value)), + + // Otherwise, we can at least claim that the value is + // within the range of `from_width`. + Fact::Range { .. } => Some(Fact::max_range_for_width_extended(from_width, to_width)), + + _ => None, + }; + trace!("uextend: fact {fact:?} from {from_width} to {to_width} -> {result:?}"); + result + } + + /// Computes the `sextend` of a value with the given facts. + pub fn sextend(&self, fact: &Fact, from_width: u16, to_width: u16) -> Option { + match fact { + // If we have a defined value in bits 0..bit_width, and + // the MSB w.r.t. `from_width` is *not* set, then we can + // do the same as `uextend`. + Fact::Range { + bit_width, + // We can ignore `min`: it is always <= max in + // unsigned terms, and we check max's LSB below. + min: _, + max, + } if *bit_width == from_width && (*max & (1 << (*bit_width - 1)) == 0) => { + self.uextend(fact, from_width, to_width) + } + _ => None, + } + } + + /// Computes the bit-truncation of a value with the given fact. + pub fn truncate(&self, fact: &Fact, from_width: u16, to_width: u16) -> Option { + if from_width == to_width { + return Some(fact.clone()); + } + + trace!( + "truncate: fact {:?} from {} to {}", + fact, + from_width, + to_width + ); + + match fact { + Fact::Range { + bit_width, + min, + max, + } if *bit_width == from_width => { + let max_val = (1u64 << to_width) - 1; + if *min <= max_val && *max <= max_val { + Some(Fact::Range { + bit_width: to_width, + min: *min, + max: *max, + }) + } else { + Some(Fact::Range { + bit_width: to_width, + min: 0, + max: max_val, + }) + } + } + _ => None, + } + } + + /// Scales a value with a fact by a known constant. + pub fn scale(&self, fact: &Fact, width: u16, factor: u32) -> Option { + let result = match fact { + x if factor == 1 => Some(x.clone()), + + Fact::Range { + bit_width, + min, + max, + } if *bit_width == width => { + let min = min.checked_mul(u64::from(factor))?; + let max = max.checked_mul(u64::from(factor))?; + if *bit_width < 64 && max > max_value_for_width(width) { + return None; + } + Some(Fact::Range { + bit_width: *bit_width, + min, + max, + }) + } + _ => None, + }; + trace!("scale: {fact:?} * {factor} at width {width} -> {result:?}"); + result + } + + /// Left-shifts a value with a fact by a known constant. + pub fn shl(&self, fact: &Fact, width: u16, amount: u16) -> Option { + if amount >= 32 { + return None; + } + let factor: u32 = 1 << amount; + self.scale(fact, width, factor) + } + + /// Offsets a value with a fact by a known amount. + pub fn offset(&self, fact: &Fact, width: u16, offset: i64) -> Option { + if offset == 0 { + return Some(fact.clone()); + } + + let compute_offset = |base: u64| -> Option { + if offset >= 0 { + base.checked_add(u64::try_from(offset).unwrap()) + } else { + base.checked_sub(u64::try_from(-offset).unwrap()) + } + }; + + let result = match fact { + Fact::Range { + bit_width, + min, + max, + } if *bit_width == width => { + let min = compute_offset(*min)?; + let max = compute_offset(*max)?; + Some(Fact::Range { + bit_width: *bit_width, + min, + max, + }) + } + Fact::DynamicRange { + bit_width, + min, + max, + } if *bit_width == width => { + let min = Expr::offset(min, offset)?; + let max = Expr::offset(max, offset)?; + Some(Fact::DynamicRange { + bit_width: *bit_width, + min, + max, + }) + } + Fact::Mem { + ty, + min_offset: mem_min_offset, + max_offset: mem_max_offset, + nullable: false, + } => { + let min_offset = compute_offset(*mem_min_offset)?; + let max_offset = compute_offset(*mem_max_offset)?; + Some(Fact::Mem { + ty: *ty, + min_offset, + max_offset, + nullable: false, + }) + } + Fact::DynamicMem { + ty, + min, + max, + nullable: false, + } => { + let min = Expr::offset(min, offset)?; + let max = Expr::offset(max, offset)?; + Some(Fact::DynamicMem { + ty: *ty, + min, + max, + nullable: false, + }) + } + _ => None, + }; + trace!("offset: {fact:?} + {offset} in width {width} -> {result:?}"); + result + } + + /// Check that accessing memory via a pointer with this fact, with + /// a memory access of the given size, is valid. + /// + /// If valid, returns the memory type and offset into that type + /// that this address accesses, if known, or `None` if the range + /// doesn't constrain the access to exactly one location. + fn check_address(&self, fact: &Fact, size: u32) -> PccResult> { + trace!("check_address: fact {:?} size {}", fact, size); + match fact { + Fact::Mem { + ty, + min_offset, + max_offset, + nullable: _, + } => { + let end_offset: u64 = max_offset + .checked_add(u64::from(size)) + .ok_or(PccError::Overflow)?; + match &self.function.memory_types[*ty] { + ir::MemoryTypeData::Struct { size, .. } + | ir::MemoryTypeData::Memory { size } => { + ensure!(end_offset <= *size, OutOfBounds) + } + ir::MemoryTypeData::DynamicMemory { .. } => bail!(OutOfBounds), + ir::MemoryTypeData::Empty => bail!(OutOfBounds), + } + let specific_ty_and_offset = if min_offset == max_offset { + Some((*ty, *min_offset)) + } else { + None + }; + Ok(specific_ty_and_offset) + } + Fact::DynamicMem { + ty, + min: _, + max: + Expr { + base: BaseExpr::GlobalValue(max_gv), + offset: max_offset, + }, + nullable: _, + } => match &self.function.memory_types[*ty] { + ir::MemoryTypeData::DynamicMemory { + gv, + size: mem_static_size, + } if gv == max_gv => { + let end_offset = max_offset + .checked_add(i64::from(size)) + .ok_or(PccError::Overflow)?; + let mem_static_size = + i64::try_from(*mem_static_size).map_err(|_| PccError::Overflow)?; + ensure!(end_offset <= mem_static_size, OutOfBounds); + Ok(None) + } + _ => bail!(OutOfBounds), + }, + _ => bail!(OutOfBounds), + } + } + + /// Get the access struct field, if any, by a pointer with the + /// given fact and an access of the given type. + pub fn struct_field<'b>( + &'b self, + fact: &Fact, + access_ty: ir::Type, + ) -> PccResult> { + let (ty, offset) = match self.check_address(fact, access_ty.bytes())? { + Some((ty, offset)) => (ty, offset), + None => return Ok(None), + }; + + if let ir::MemoryTypeData::Struct { fields, .. } = &self.function.memory_types[ty] { + let field = fields + .iter() + .find(|field| field.offset == offset) + .ok_or(PccError::InvalidFieldOffset)?; + if field.ty != access_ty { + bail!(BadFieldType); + } + Ok(Some(field)) + } else { + // Access to valid memory, but not a struct: no facts can be attached to the result. + Ok(None) + } + } + + /// Check a load, and determine what fact, if any, the result of the load might have. + pub fn load<'b>(&'b self, fact: &Fact, access_ty: ir::Type) -> PccResult> { + Ok(self + .struct_field(fact, access_ty)? + .and_then(|field| field.fact())) + } + + /// Check a store. + pub fn store( + &self, + fact: &Fact, + access_ty: ir::Type, + data_fact: Option<&Fact>, + ) -> PccResult<()> { + if let Some(field) = self.struct_field(fact, access_ty)? { + // If it's a read-only field, disallow. + if field.readonly { + bail!(WriteToReadOnlyField); + } + // Check that the fact on the stored data subsumes the field's fact. + if !self.subsumes_fact_optionals(data_fact, field.fact()) { + bail!(InvalidStoredFact); + } + } + Ok(()) + } + + /// Apply a known inequality to rewrite dynamic bounds using transitivity, if possible. + /// + /// Given that `lhs >= rhs` (if not `strict`) or `lhs > rhs` (if + /// `strict`), update `fact`. + pub fn apply_inequality( + &self, + fact: &Fact, + lhs: &Fact, + rhs: &Fact, + kind: InequalityKind, + ) -> Fact { + let result = match ( + lhs.as_symbol(), + lhs.as_const(self.pointer_width) + .and_then(|k| i64::try_from(k).ok()), + rhs.as_symbol(), + fact, + ) { + ( + Some(lhs), + None, + Some(rhs), + Fact::DynamicMem { + ty, + min, + max, + nullable, + }, + ) if rhs.base == max.base => { + let strict_offset = match kind { + InequalityKind::Strict => 1, + InequalityKind::Loose => 0, + }; + if let Some(offset) = max + .offset + .checked_add(lhs.offset) + .and_then(|x| x.checked_sub(rhs.offset)) + .and_then(|x| x.checked_sub(strict_offset)) + { + let new_max = Expr { + base: lhs.base.clone(), + offset, + }; + Fact::DynamicMem { + ty: *ty, + min: min.clone(), + max: new_max, + nullable: *nullable, + } + } else { + fact.clone() + } + } + + ( + None, + Some(lhs_const), + Some(rhs), + Fact::DynamicMem { + ty, + min: _, + max, + nullable, + }, + ) if rhs.base == max.base => { + let strict_offset = match kind { + InequalityKind::Strict => 1, + InequalityKind::Loose => 0, + }; + if let Some(offset) = max + .offset + .checked_add(lhs_const) + .and_then(|x| x.checked_sub(rhs.offset)) + .and_then(|x| x.checked_sub(strict_offset)) + { + Fact::Mem { + ty: *ty, + min_offset: 0, + max_offset: u64::try_from(offset).unwrap_or(0), + nullable: *nullable, + } + } else { + fact.clone() + } + } + + _ => fact.clone(), + }; + trace!("apply_inequality({fact:?}, {lhs:?}, {rhs:?}, {kind:?} -> {result:?}"); + result + } + + /// Compute the union of two facts, if possible. + pub fn union(&self, lhs: &Fact, rhs: &Fact) -> Option { + let result = match (lhs, rhs) { + (lhs, rhs) if lhs == rhs => Some(lhs.clone()), + + ( + Fact::DynamicMem { + ty: ty_lhs, + min: min_lhs, + max: max_lhs, + nullable: nullable_lhs, + }, + Fact::DynamicMem { + ty: ty_rhs, + min: min_rhs, + max: max_rhs, + nullable: nullable_rhs, + }, + ) if ty_lhs == ty_rhs => Some(Fact::DynamicMem { + ty: *ty_lhs, + min: Expr::min(min_lhs, min_rhs), + max: Expr::max(max_lhs, max_rhs), + nullable: *nullable_lhs || *nullable_rhs, + }), + + ( + Fact::Range { + bit_width: bw_const, + min: 0, + max: 0, + }, + Fact::DynamicMem { + ty, + min, + max, + nullable: _, + }, + ) + | ( + Fact::DynamicMem { + ty, + min, + max, + nullable: _, + }, + Fact::Range { + bit_width: bw_const, + min: 0, + max: 0, + }, + ) if *bw_const == self.pointer_width => Some(Fact::DynamicMem { + ty: *ty, + min: min.clone(), + max: max.clone(), + nullable: true, + }), + + ( + Fact::Range { + bit_width: bw_const, + min: 0, + max: 0, + }, + Fact::Mem { + ty, + min_offset, + max_offset, + nullable: _, + }, + ) + | ( + Fact::Mem { + ty, + min_offset, + max_offset, + nullable: _, + }, + Fact::Range { + bit_width: bw_const, + min: 0, + max: 0, + }, + ) if *bw_const == self.pointer_width => Some(Fact::Mem { + ty: *ty, + min_offset: *min_offset, + max_offset: *max_offset, + nullable: true, + }), + + _ => None, + }; + trace!("union({lhs:?}, {rhs:?}) -> {result:?}"); + result + } +} + +fn max_value_for_width(bits: u16) -> u64 { + assert!(bits <= 64); + if bits == 64 { + u64::MAX + } else { + (1u64 << bits) - 1 + } +} + +/// Top-level entry point after compilation: this checks the facts in +/// VCode. +pub fn check_vcode_facts( + f: &ir::Function, + vcode: &mut VCode, + backend: &B, +) -> PccResult<()> { + let ctx = FactContext::new(f, backend.triple().pointer_width().unwrap().bits().into()); + + // Check that individual instructions are valid according to input + // facts, and support the stated output facts. + for block in 0..vcode.num_blocks() { + let block = BlockIndex::new(block); + let mut flow_state = B::FactFlowState::default(); + for inst in vcode.block_insns(block).iter() { + // Check any output facts on this inst. + if let Err(e) = backend.check_fact(&ctx, vcode, inst, &mut flow_state) { + log::info!("Error checking instruction: {:?}", vcode[inst]); + return Err(e); + } + + // If this is a branch, check that all block arguments subsume + // the assumed facts on the blockparams of successors. + if vcode.is_branch(inst) { + for (succ_idx, succ) in vcode.block_succs(block).iter().enumerate() { + for (arg, param) in vcode + .branch_blockparams(block, inst, succ_idx) + .iter() + .zip(vcode.block_params(*succ).iter()) + { + let arg_fact = vcode.vreg_fact(*arg); + let param_fact = vcode.vreg_fact(*param); + if !ctx.subsumes_fact_optionals(arg_fact, param_fact) { + return Err(PccError::UnsupportedBlockparam); + } + } + } + } + } + } + Ok(()) +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/progpoint.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/progpoint.rs new file mode 100644 index 000000000..84d394276 --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/progpoint.rs @@ -0,0 +1,75 @@ +//! Program points. + +use crate::ir::{Block, Inst}; +use core::fmt; + +/// A `ProgramPoint` represents a position in a function where the live range of an SSA value can +/// begin or end. It can be either: +/// +/// 1. An instruction or +/// 2. A block header. +/// +/// This corresponds more or less to the lines in the textual form of Cranelift IR. +#[derive(PartialEq, Eq, Clone, Copy)] +pub enum ProgramPoint { + /// An instruction in the function. + Inst(Inst), + /// A block header. + Block(Block), +} + +impl ProgramPoint { + /// Get the instruction we know is inside. + pub fn unwrap_inst(self) -> Inst { + match self { + Self::Inst(x) => x, + Self::Block(x) => panic!("expected inst: {x}"), + } + } +} + +impl From for ProgramPoint { + fn from(inst: Inst) -> Self { + Self::Inst(inst) + } +} + +impl From for ProgramPoint { + fn from(block: Block) -> Self { + Self::Block(block) + } +} + +impl fmt::Display for ProgramPoint { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + Self::Inst(x) => write!(f, "{x}"), + Self::Block(x) => write!(f, "{x}"), + } + } +} + +impl fmt::Debug for ProgramPoint { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "ProgramPoint({self})") + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::entity::EntityRef; + use alloc::string::ToString; + + #[test] + fn convert() { + let i5 = Inst::new(5); + let b3 = Block::new(3); + + let pp1: ProgramPoint = i5.into(); + let pp2: ProgramPoint = b3.into(); + + assert_eq!(pp1.to_string(), "inst5"); + assert_eq!(pp2.to_string(), "block3"); + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/sourceloc.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/sourceloc.rs new file mode 100644 index 000000000..6704574d0 --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/sourceloc.rs @@ -0,0 +1,117 @@ +//! Source locations. +//! +//! Cranelift tracks the original source location of each instruction, and preserves the source +//! location when instructions are transformed. + +use core::fmt; +#[cfg(feature = "enable-serde")] +use serde_derive::{Deserialize, Serialize}; + +/// A source location. +/// +/// This is an opaque 32-bit number attached to each Cranelift IR instruction. Cranelift does not +/// interpret source locations in any way, they are simply preserved from the input to the output. +/// +/// The default source location uses the all-ones bit pattern `!0`. It is used for instructions +/// that can't be given a real source location. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct SourceLoc(u32); + +impl SourceLoc { + /// Create a new source location with the given bits. + pub fn new(bits: u32) -> Self { + Self(bits) + } + + /// Is this the default source location? + pub fn is_default(self) -> bool { + self == Default::default() + } + + /// Read the bits of this source location. + pub fn bits(self) -> u32 { + self.0 + } +} + +impl Default for SourceLoc { + fn default() -> Self { + Self(!0) + } +} + +impl fmt::Display for SourceLoc { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.is_default() { + write!(f, "@-") + } else { + write!(f, "@{:04x}", self.0) + } + } +} + +/// Source location relative to another base source location. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct RelSourceLoc(u32); + +impl RelSourceLoc { + /// Create a new relative source location with the given bits. + pub fn new(bits: u32) -> Self { + Self(bits) + } + + /// Creates a new `RelSourceLoc` based on the given base and offset. + pub fn from_base_offset(base: SourceLoc, offset: SourceLoc) -> Self { + if base.is_default() || offset.is_default() { + Self::default() + } else { + Self(offset.bits().wrapping_sub(base.bits())) + } + } + + /// Expands the relative source location into an absolute one, using the given base. + pub fn expand(&self, base: SourceLoc) -> SourceLoc { + if self.is_default() || base.is_default() { + Default::default() + } else { + SourceLoc::new(self.0.wrapping_add(base.bits())) + } + } + + /// Is this the default relative source location? + pub fn is_default(self) -> bool { + self == Default::default() + } +} + +impl Default for RelSourceLoc { + fn default() -> Self { + Self(!0) + } +} + +impl fmt::Display for RelSourceLoc { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.is_default() { + write!(f, "@-") + } else { + write!(f, "@+{:04x}", self.0) + } + } +} + +#[cfg(test)] +mod tests { + use crate::ir::SourceLoc; + use alloc::string::ToString; + + #[test] + fn display() { + assert_eq!(SourceLoc::default().to_string(), "@-"); + assert_eq!(SourceLoc::new(0).to_string(), "@0000"); + assert_eq!(SourceLoc::new(16).to_string(), "@0010"); + assert_eq!(SourceLoc::new(0xabcdef).to_string(), "@abcdef"); + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/stackslot.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/stackslot.rs new file mode 100644 index 000000000..d906fb291 --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/stackslot.rs @@ -0,0 +1,208 @@ +//! Stack slots. +//! +//! The `StackSlotData` struct keeps track of a single stack slot in a function. +//! + +use crate::entity::PrimaryMap; +use crate::ir::entities::{DynamicStackSlot, DynamicType}; +use crate::ir::StackSlot; +use core::fmt; +use core::str::FromStr; + +/// imports only needed for testing. +#[allow(unused_imports)] +use crate::ir::{DynamicTypeData, GlobalValueData}; + +#[allow(unused_imports)] +use crate::ir::types::*; + +#[cfg(feature = "enable-serde")] +use serde_derive::{Deserialize, Serialize}; + +/// The size of an object on the stack, or the size of a stack frame. +/// +/// We don't use `usize` to represent object sizes on the target platform because Cranelift supports +/// cross-compilation, and `usize` is a type that depends on the host platform, not the target +/// platform. +pub type StackSize = u32; + +/// The kind of a stack slot. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub enum StackSlotKind { + /// An explicit stack slot. This is a chunk of stack memory for use by the `stack_load` + /// and `stack_store` instructions. + ExplicitSlot, + /// An explicit stack slot for dynamic vector types. This is a chunk of stack memory + /// for use by the `dynamic_stack_load` and `dynamic_stack_store` instructions. + ExplicitDynamicSlot, +} + +impl FromStr for StackSlotKind { + type Err = (); + + fn from_str(s: &str) -> Result { + use self::StackSlotKind::*; + match s { + "explicit_slot" => Ok(ExplicitSlot), + "explicit_dynamic_slot" => Ok(ExplicitDynamicSlot), + _ => Err(()), + } + } +} + +impl fmt::Display for StackSlotKind { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use self::StackSlotKind::*; + f.write_str(match *self { + ExplicitSlot => "explicit_slot", + ExplicitDynamicSlot => "explicit_dynamic_slot", + }) + } +} + +/// Contents of a stack slot. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct StackSlotData { + /// The kind of stack slot. + pub kind: StackSlotKind, + + /// Size of stack slot in bytes. + pub size: StackSize, + + /// Alignment of stack slot as a power-of-two exponent (log2 + /// value). The stack slot will be at least this aligned; it may + /// be aligned according to other considerations, such as minimum + /// stack slot size or machine word size, as well. + pub align_shift: u8, +} + +impl StackSlotData { + /// Create a stack slot with the specified byte size and alignment. + pub fn new(kind: StackSlotKind, size: StackSize, align_shift: u8) -> Self { + Self { + kind, + size, + align_shift, + } + } +} + +impl fmt::Display for StackSlotData { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.align_shift != 0 { + write!( + f, + "{} {}, align = {}", + self.kind, + self.size, + 1u32 << self.align_shift + ) + } else { + write!(f, "{} {}", self.kind, self.size) + } + } +} + +/// Contents of a dynamic stack slot. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct DynamicStackSlotData { + /// The kind of stack slot. + pub kind: StackSlotKind, + + /// The type of this slot. + pub dyn_ty: DynamicType, +} + +impl DynamicStackSlotData { + /// Create a stack slot with the specified byte size. + pub fn new(kind: StackSlotKind, dyn_ty: DynamicType) -> Self { + assert!(kind == StackSlotKind::ExplicitDynamicSlot); + Self { kind, dyn_ty } + } + + /// Get the alignment in bytes of this stack slot given the stack pointer alignment. + pub fn alignment(&self, max_align: StackSize) -> StackSize { + debug_assert!(max_align.is_power_of_two()); + max_align + } +} + +impl fmt::Display for DynamicStackSlotData { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{} {}", self.kind, self.dyn_ty) + } +} + +/// All allocated stack slots. +pub type StackSlots = PrimaryMap; + +/// All allocated dynamic stack slots. +pub type DynamicStackSlots = PrimaryMap; + +#[cfg(test)] +mod tests { + use super::*; + use crate::ir::Function; + use alloc::string::ToString; + + #[test] + fn stack_slot() { + let mut func = Function::new(); + + let ss0 = + func.create_sized_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 4, 0)); + let ss1 = + func.create_sized_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 8, 0)); + assert_eq!(ss0.to_string(), "ss0"); + assert_eq!(ss1.to_string(), "ss1"); + + assert_eq!(func.sized_stack_slots[ss0].size, 4); + assert_eq!(func.sized_stack_slots[ss1].size, 8); + + assert_eq!(func.sized_stack_slots[ss0].to_string(), "explicit_slot 4"); + assert_eq!(func.sized_stack_slots[ss1].to_string(), "explicit_slot 8"); + } + + #[test] + fn dynamic_stack_slot() { + let mut func = Function::new(); + + let int_vector_ty = I32X4; + let fp_vector_ty = F64X2; + let scale0 = GlobalValueData::DynScaleTargetConst { + vector_type: int_vector_ty, + }; + let scale1 = GlobalValueData::DynScaleTargetConst { + vector_type: fp_vector_ty, + }; + let gv0 = func.create_global_value(scale0); + let gv1 = func.create_global_value(scale1); + let dtd0 = DynamicTypeData::new(int_vector_ty, gv0); + let dtd1 = DynamicTypeData::new(fp_vector_ty, gv1); + let dt0 = func.dfg.make_dynamic_ty(dtd0); + let dt1 = func.dfg.make_dynamic_ty(dtd1); + + let dss0 = func.create_dynamic_stack_slot(DynamicStackSlotData::new( + StackSlotKind::ExplicitDynamicSlot, + dt0, + )); + let dss1 = func.create_dynamic_stack_slot(DynamicStackSlotData::new( + StackSlotKind::ExplicitDynamicSlot, + dt1, + )); + assert_eq!(dss0.to_string(), "dss0"); + assert_eq!(dss1.to_string(), "dss1"); + + assert_eq!( + func.dynamic_stack_slots[dss0].to_string(), + "explicit_dynamic_slot dt0" + ); + assert_eq!( + func.dynamic_stack_slots[dss1].to_string(), + "explicit_dynamic_slot dt1" + ); + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/trapcode.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/trapcode.rs new file mode 100644 index 000000000..e3fbcfbb4 --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/trapcode.rs @@ -0,0 +1,147 @@ +//! Trap codes describing the reason for a trap. + +use core::fmt::{self, Display, Formatter}; +use core::num::NonZeroU8; +use core::str::FromStr; +#[cfg(feature = "enable-serde")] +use serde_derive::{Deserialize, Serialize}; + +/// A trap code describing the reason for a trap. +/// +/// All trap instructions have an explicit trap code. +#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct TrapCode(NonZeroU8); + +impl TrapCode { + /// Number of reserved opcodes for Cranelift itself. This number of traps are + /// defined below starting at the high end of the byte space (e.g. 255, 254, + /// ...) + const RESERVED: u8 = 5; + const RESERVED_START: u8 = u8::MAX - Self::RESERVED + 1; + + /// Internal helper to create new reserved trap codes. + const fn reserved(byte: u8) -> TrapCode { + if let Some(code) = byte.checked_add(Self::RESERVED_START) { + if let Some(nz) = NonZeroU8::new(code) { + return TrapCode(nz); + } + } + panic!("invalid reserved opcode") + } + + /// The current stack space was exhausted. + pub const STACK_OVERFLOW: TrapCode = TrapCode::reserved(0); + /// An integer arithmetic operation caused an overflow. + pub const INTEGER_OVERFLOW: TrapCode = TrapCode::reserved(1); + /// A `heap_addr` instruction detected an out-of-bounds error. + /// + /// Note that not all out-of-bounds heap accesses are reported this way; + /// some are detected by a segmentation fault on the heap unmapped or + /// offset-guard pages. + pub const HEAP_OUT_OF_BOUNDS: TrapCode = TrapCode::reserved(2); + + /// An integer division by zero. + pub const INTEGER_DIVISION_BY_ZERO: TrapCode = TrapCode::reserved(3); + + /// Failed float-to-int conversion. + pub const BAD_CONVERSION_TO_INTEGER: TrapCode = TrapCode::reserved(4); + + /// Create a user-defined trap code. + /// + /// Returns `None` if `code` is zero or too large and is reserved by + /// Cranelift. + pub const fn user(code: u8) -> Option { + if code >= Self::RESERVED_START { + return None; + } + match NonZeroU8::new(code) { + Some(nz) => Some(TrapCode(nz)), + None => None, + } + } + + /// Alias for [`TrapCode::user`] with a panic built-in. + pub const fn unwrap_user(code: u8) -> TrapCode { + match TrapCode::user(code) { + Some(code) => code, + None => panic!("invalid user trap code"), + } + } + + /// Returns the raw byte representing this trap. + pub const fn as_raw(&self) -> NonZeroU8 { + self.0 + } + + /// Creates a trap code from its raw byte, likely returned by + /// [`TrapCode::as_raw`] previously. + pub const fn from_raw(byte: NonZeroU8) -> TrapCode { + TrapCode(byte) + } + + /// Returns a slice of all traps except `TrapCode::User` traps + pub const fn non_user_traps() -> &'static [TrapCode] { + &[ + TrapCode::STACK_OVERFLOW, + TrapCode::HEAP_OUT_OF_BOUNDS, + TrapCode::INTEGER_OVERFLOW, + TrapCode::INTEGER_DIVISION_BY_ZERO, + TrapCode::BAD_CONVERSION_TO_INTEGER, + ] + } +} + +impl Display for TrapCode { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + let identifier = match *self { + Self::STACK_OVERFLOW => "stk_ovf", + Self::HEAP_OUT_OF_BOUNDS => "heap_oob", + Self::INTEGER_OVERFLOW => "int_ovf", + Self::INTEGER_DIVISION_BY_ZERO => "int_divz", + Self::BAD_CONVERSION_TO_INTEGER => "bad_toint", + TrapCode(x) => return write!(f, "user{x}"), + }; + f.write_str(identifier) + } +} + +impl FromStr for TrapCode { + type Err = (); + + fn from_str(s: &str) -> Result { + match s { + "stk_ovf" => Ok(Self::STACK_OVERFLOW), + "heap_oob" => Ok(Self::HEAP_OUT_OF_BOUNDS), + "int_ovf" => Ok(Self::INTEGER_OVERFLOW), + "int_divz" => Ok(Self::INTEGER_DIVISION_BY_ZERO), + "bad_toint" => Ok(Self::BAD_CONVERSION_TO_INTEGER), + _ if s.starts_with("user") => { + let num = s[4..].parse().map_err(|_| ())?; + TrapCode::user(num).ok_or(()) + } + _ => Err(()), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use alloc::string::ToString; + + #[test] + fn display() { + for r in TrapCode::non_user_traps() { + let tc = *r; + assert_eq!(tc.to_string().parse(), Ok(tc)); + } + assert_eq!("bogus".parse::(), Err(())); + + assert_eq!(TrapCode::unwrap_user(17).to_string(), "user17"); + assert_eq!("user22".parse(), Ok(TrapCode::unwrap_user(22))); + assert_eq!("user".parse::(), Err(())); + assert_eq!("user-1".parse::(), Err(())); + assert_eq!("users".parse::(), Err(())); + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/types.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/types.rs new file mode 100644 index 000000000..bc7704cf0 --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/types.rs @@ -0,0 +1,624 @@ +//! Common types for the Cranelift code generator. + +use core::fmt::{self, Debug, Display, Formatter}; +use cranelift_codegen_shared::constants; +#[cfg(feature = "enable-serde")] +use serde_derive::{Deserialize, Serialize}; +use target_lexicon::{PointerWidth, Triple}; + +/// The type of an SSA value. +/// +/// The `INVALID` type isn't a real type, and is used as a placeholder in the IR where a type +/// field is present put no type is needed, such as the controlling type variable for a +/// non-polymorphic instruction. +/// +/// Basic integer types: `I8`, `I16`, `I32`, `I64`, and `I128`. These types are sign-agnostic. +/// +/// Basic floating point types: `F16`, `F32`, `F64`, and `F128`. IEEE half, single, double, and quadruple precision. +/// +/// SIMD vector types have power-of-two lanes, up to 256. Lanes can be any int/float type. +/// +/// Note that this is encoded in a `u16` currently for extensibility, +/// but allows only 14 bits to be used due to some bitpacking tricks +/// in the CLIF data structures. +#[derive(Copy, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct Type(u16); + +/// Not a valid type. Can't be loaded or stored. Can't be part of a SIMD vector. +pub const INVALID: Type = Type(0); + +// Include code generated by `cranelift-codegen/meta/gen_types.rs`. This file contains constant +// definitions for all the scalar types as well as common vector types for 64, 128, 256, and +// 512-bit SIMD vectors. +include!(concat!(env!("OUT_DIR"), "/types.rs")); + +impl Type { + /// Get the lane type of this SIMD vector type. + /// + /// A lane type is the same as a SIMD vector type with one lane, so it returns itself. + pub fn lane_type(self) -> Self { + if self.0 < constants::VECTOR_BASE { + self + } else { + Self(constants::LANE_BASE | (self.0 & 0x0f)) + } + } + + /// The type transformation that returns the lane type of a type variable; it is just a + /// renaming of lane_type() to be used in context where we think in terms of type variable + /// transformations. + pub fn lane_of(self) -> Self { + self.lane_type() + } + + /// Get log_2 of the number of bits in a lane. + pub fn log2_lane_bits(self) -> u32 { + match self.lane_type() { + I8 => 3, + I16 | F16 => 4, + I32 | F32 => 5, + I64 | F64 => 6, + I128 | F128 => 7, + _ => 0, + } + } + + /// Get the number of bits in a lane. + pub fn lane_bits(self) -> u32 { + match self.lane_type() { + I8 => 8, + I16 | F16 => 16, + I32 | F32 => 32, + I64 | F64 => 64, + I128 | F128 => 128, + _ => 0, + } + } + + /// Get the (minimum, maximum) values represented by each lane in the type. + /// Note that these are returned as unsigned 'bit patterns'. + pub fn bounds(self, signed: bool) -> (u128, u128) { + if signed { + match self.lane_type() { + I8 => (i8::MIN as u128, i8::MAX as u128), + I16 => (i16::MIN as u128, i16::MAX as u128), + I32 => (i32::MIN as u128, i32::MAX as u128), + I64 => (i64::MIN as u128, i64::MAX as u128), + I128 => (i128::MIN as u128, i128::MAX as u128), + _ => unimplemented!(), + } + } else { + match self.lane_type() { + I8 => (u8::MIN as u128, u8::MAX as u128), + I16 => (u16::MIN as u128, u16::MAX as u128), + I32 => (u32::MIN as u128, u32::MAX as u128), + I64 => (u64::MIN as u128, u64::MAX as u128), + I128 => (u128::MIN, u128::MAX), + _ => unimplemented!(), + } + } + } + + /// Get an integer type with the requested number of bits. + /// + /// For the same thing but in *bytes*, use [`Self::int_with_byte_size`]. + pub fn int(bits: u16) -> Option { + match bits { + 8 => Some(I8), + 16 => Some(I16), + 32 => Some(I32), + 64 => Some(I64), + 128 => Some(I128), + _ => None, + } + } + + /// Get an integer type with the requested number of bytes. + /// + /// For the same thing but in *bits*, use [`Self::int`]. + pub fn int_with_byte_size(bytes: u16) -> Option { + Self::int(bytes.checked_mul(8)?) + } + + /// Get a type with the same number of lanes as `self`, but using `lane` as the lane type. + fn replace_lanes(self, lane: Self) -> Self { + debug_assert!(lane.is_lane() && !self.is_special()); + Self((lane.0 & 0x0f) | (self.0 & 0xf0)) + } + + /// Get a type with the same number of lanes as this type, but with the lanes replaced by + /// booleans of the same size. + /// + /// Lane types are treated as vectors with one lane, so they are converted to the multi-bit + /// boolean types. + pub fn as_truthy_pedantic(self) -> Self { + // Replace the low 4 bits with the boolean version, preserve the high 4 bits. + self.replace_lanes(match self.lane_type() { + I8 => I8, + I16 | F16 => I16, + I32 | F32 => I32, + I64 | F64 => I64, + I128 | F128 => I128, + _ => I8, + }) + } + + /// Get the type of a comparison result for the given type. For vectors this will be a vector + /// with the same number of lanes and integer elements, and for scalar types this will be `i8`, + /// which is the result type of comparisons. + pub fn as_truthy(self) -> Self { + if !self.is_vector() { + I8 + } else { + self.as_truthy_pedantic() + } + } + + /// Get a type with the same number of lanes as this type, but with the lanes replaced by + /// integers of the same size. + pub fn as_int(self) -> Self { + self.replace_lanes(match self.lane_type() { + I8 => I8, + I16 | F16 => I16, + I32 | F32 => I32, + I64 | F64 => I64, + I128 | F128 => I128, + _ => unimplemented!(), + }) + } + + /// Get a type with the same number of lanes as this type, but with lanes that are half the + /// number of bits. + pub fn half_width(self) -> Option { + Some(self.replace_lanes(match self.lane_type() { + I16 => I8, + I32 => I16, + I64 => I32, + I128 => I64, + F32 => F16, + F64 => F32, + F128 => F64, + _ => return None, + })) + } + + /// Get a type with the same number of lanes as this type, but with lanes that are twice the + /// number of bits. + pub fn double_width(self) -> Option { + Some(self.replace_lanes(match self.lane_type() { + I8 => I16, + I16 => I32, + I32 => I64, + I64 => I128, + F16 => F32, + F32 => F64, + F64 => F128, + _ => return None, + })) + } + + /// Is this the INVALID type? + pub fn is_invalid(self) -> bool { + self == INVALID + } + + /// Is this a special type? + pub fn is_special(self) -> bool { + self.0 < constants::LANE_BASE + } + + /// Is this a lane type? + /// + /// This is a scalar type that can also appear as the lane type of a SIMD vector. + pub fn is_lane(self) -> bool { + constants::LANE_BASE <= self.0 && self.0 < constants::VECTOR_BASE + } + + /// Is this a SIMD vector type? + /// + /// A vector type has 2 or more lanes. + pub fn is_vector(self) -> bool { + self.0 >= constants::VECTOR_BASE && !self.is_dynamic_vector() + } + + /// Is this a SIMD vector type with a runtime number of lanes? + pub fn is_dynamic_vector(self) -> bool { + self.0 >= constants::DYNAMIC_VECTOR_BASE + } + + /// Is this a scalar integer type? + pub fn is_int(self) -> bool { + match self { + I8 | I16 | I32 | I64 | I128 => true, + _ => false, + } + } + + /// Is this a scalar floating point type? + pub fn is_float(self) -> bool { + match self { + F16 | F32 | F64 | F128 => true, + _ => false, + } + } + + /// Get log_2 of the number of lanes in this SIMD vector type. + /// + /// All SIMD types have a lane count that is a power of two and no larger than 256, so this + /// will be a number in the range 0-8. + /// + /// A scalar type is the same as a SIMD vector type with one lane, so it returns 0. + pub fn log2_lane_count(self) -> u32 { + if self.is_dynamic_vector() { + 0 + } else { + (self.0.saturating_sub(constants::LANE_BASE) >> 4) as u32 + } + } + + /// Get log_2 of the number of lanes in this vector/dynamic type. + pub fn log2_min_lane_count(self) -> u32 { + if self.is_dynamic_vector() { + (self + .0 + .saturating_sub(constants::VECTOR_BASE + constants::LANE_BASE) + >> 4) as u32 + } else { + self.log2_lane_count() + } + } + + /// Get the number of lanes in this SIMD vector type. + /// + /// A scalar type is the same as a SIMD vector type with one lane, so it returns 1. + pub fn lane_count(self) -> u32 { + if self.is_dynamic_vector() { + 0 + } else { + 1 << self.log2_lane_count() + } + } + + /// Get the total number of bits used to represent this type. + pub fn bits(self) -> u32 { + if self.is_dynamic_vector() { + 0 + } else { + self.lane_bits() * self.lane_count() + } + } + + /// Get the minimum of lanes in this SIMD vector type, this supports both fixed and + /// dynamic types. + pub fn min_lane_count(self) -> u32 { + if self.is_dynamic_vector() { + 1 << self.log2_min_lane_count() + } else { + 1 << self.log2_lane_count() + } + } + + /// Get the minimum number of bits used to represent this type. + pub fn min_bits(self) -> u32 { + if self.is_dynamic_vector() { + self.lane_bits() * self.min_lane_count() + } else { + self.bits() + } + } + + /// Get the number of bytes used to store this type in memory. + pub fn bytes(self) -> u32 { + (self.bits() + 7) / 8 + } + + /// Get a SIMD vector type with `n` times more lanes than this one. + /// + /// If this is a scalar type, this produces a SIMD type with this as a lane type and `n` lanes. + /// + /// If this is already a SIMD vector type, this produces a SIMD vector type with `n * + /// self.lane_count()` lanes. + pub fn by(self, n: u32) -> Option { + if self.is_dynamic_vector() { + return None; + } + if self.lane_bits() == 0 || !n.is_power_of_two() { + return None; + } + let log2_lanes: u32 = n.trailing_zeros(); + let new_type = u32::from(self.0) + (log2_lanes << 4); + if new_type < constants::DYNAMIC_VECTOR_BASE as u32 + && (new_type as u16) < constants::DYNAMIC_VECTOR_BASE + { + Some(Self(new_type as u16)) + } else { + None + } + } + + /// Convert a fixed vector type to a dynamic one. + pub fn vector_to_dynamic(self) -> Option { + assert!(self.is_vector()); + if self.bits() > 256 { + return None; + } + let new_ty = self.0 + constants::VECTOR_BASE; + let ty = Some(Self(new_ty)); + assert!(ty.unwrap().is_dynamic_vector()); + return ty; + } + + /// Convert a dynamic vector type to a fixed one. + pub fn dynamic_to_vector(self) -> Option { + assert!(self.is_dynamic_vector()); + Some(Self(self.0 - constants::VECTOR_BASE)) + } + + /// Split the lane width in half and double the number of lanes to maintain the same bit-width. + /// + /// If this is a scalar type of `n` bits, it produces a SIMD vector type of `(n/2)x2`. + pub fn split_lanes(self) -> Option { + match self.half_width() { + Some(half_width) => half_width.by(2), + None => None, + } + } + + /// Merge lanes to half the number of lanes and double the lane width to maintain the same + /// bit-width. + /// + /// If this is a scalar type, it will return `None`. + pub fn merge_lanes(self) -> Option { + match self.double_width() { + Some(double_width) => { + if double_width.is_vector() && !double_width.is_dynamic_vector() { + Some(Self(double_width.0 - 0x10)) + } else { + None + } + } + None => None, + } + } + + /// Index of this type, for use with hash tables etc. + pub fn index(self) -> usize { + usize::from(self.0) + } + + /// True iff: + /// + /// 1. `self.lane_count() == other.lane_count()` and + /// 2. `self.lane_bits() >= other.lane_bits()` + pub fn wider_or_equal(self, other: Self) -> bool { + self.lane_count() == other.lane_count() && self.lane_bits() >= other.lane_bits() + } + + /// Return the pointer type for the given target triple. + pub fn triple_pointer_type(triple: &Triple) -> Self { + match triple.pointer_width() { + Ok(PointerWidth::U16) => I16, + Ok(PointerWidth::U32) => I32, + Ok(PointerWidth::U64) => I64, + Err(()) => panic!("unable to determine architecture pointer width"), + } + } + + /// Gets a bit-level representation of the type. Used only + /// internally for efficiently storing types. + pub(crate) fn repr(self) -> u16 { + self.0 + } + + /// Converts from a bit-level representation of the type back to a + /// `Type`. + pub(crate) fn from_repr(bits: u16) -> Type { + Type(bits) + } +} + +impl Display for Type { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + if self.is_int() { + write!(f, "i{}", self.lane_bits()) + } else if self.is_float() { + write!(f, "f{}", self.lane_bits()) + } else if self.is_vector() { + write!(f, "{}x{}", self.lane_type(), self.lane_count()) + } else if self.is_dynamic_vector() { + write!(f, "{:?}x{}xN", self.lane_type(), self.min_lane_count()) + } else { + match *self { + INVALID => panic!("INVALID encountered"), + _ => panic!("Unknown Type(0x{:x})", self.0), + } + } + } +} + +impl Debug for Type { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + if self.is_int() { + write!(f, "types::I{}", self.lane_bits()) + } else if self.is_float() { + write!(f, "types::F{}", self.lane_bits()) + } else if self.is_vector() { + write!(f, "{:?}X{}", self.lane_type(), self.lane_count()) + } else if self.is_dynamic_vector() { + write!(f, "{:?}X{}XN", self.lane_type(), self.min_lane_count()) + } else { + match *self { + INVALID => write!(f, "types::INVALID"), + _ => write!(f, "Type(0x{:x})", self.0), + } + } + } +} + +impl Default for Type { + fn default() -> Self { + INVALID + } +} + +#[cfg(test)] +mod tests { + use super::*; + use alloc::string::ToString; + + #[test] + fn basic_scalars() { + assert_eq!(INVALID, INVALID.lane_type()); + assert_eq!(0, INVALID.bits()); + assert_eq!(I8, I8.lane_type()); + assert_eq!(I16, I16.lane_type()); + assert_eq!(I32, I32.lane_type()); + assert_eq!(I64, I64.lane_type()); + assert_eq!(I128, I128.lane_type()); + assert_eq!(F32, F32.lane_type()); + assert_eq!(F16, F16.lane_type()); + assert_eq!(F64, F64.lane_type()); + assert_eq!(F128, F128.lane_type()); + assert_eq!(I32, I32X4.lane_type()); + assert_eq!(F64, F64X2.lane_type()); + + assert_eq!(INVALID.lane_bits(), 0); + assert_eq!(I8.lane_bits(), 8); + assert_eq!(I16.lane_bits(), 16); + assert_eq!(I32.lane_bits(), 32); + assert_eq!(I64.lane_bits(), 64); + assert_eq!(I128.lane_bits(), 128); + assert_eq!(F16.lane_bits(), 16); + assert_eq!(F32.lane_bits(), 32); + assert_eq!(F64.lane_bits(), 64); + assert_eq!(F128.lane_bits(), 128); + } + + #[test] + fn typevar_functions() { + assert_eq!(INVALID.half_width(), None); + assert_eq!(INVALID.half_width(), None); + assert_eq!(I8.half_width(), None); + assert_eq!(I16.half_width(), Some(I8)); + assert_eq!(I32.half_width(), Some(I16)); + assert_eq!(I32X4.half_width(), Some(I16X4)); + assert_eq!(I64.half_width(), Some(I32)); + assert_eq!(I128.half_width(), Some(I64)); + assert_eq!(F16.half_width(), None); + assert_eq!(F32.half_width(), Some(F16)); + assert_eq!(F64.half_width(), Some(F32)); + assert_eq!(F128.half_width(), Some(F64)); + + assert_eq!(INVALID.double_width(), None); + assert_eq!(I8.double_width(), Some(I16)); + assert_eq!(I16.double_width(), Some(I32)); + assert_eq!(I32.double_width(), Some(I64)); + assert_eq!(I32X4.double_width(), Some(I64X4)); + assert_eq!(I64.double_width(), Some(I128)); + assert_eq!(I128.double_width(), None); + assert_eq!(F16.double_width(), Some(F32)); + assert_eq!(F32.double_width(), Some(F64)); + assert_eq!(F64.double_width(), Some(F128)); + assert_eq!(F128.double_width(), None); + } + + #[test] + fn vectors() { + let big = F64.by(256).unwrap(); + assert_eq!(big.lane_bits(), 64); + assert_eq!(big.lane_count(), 256); + assert_eq!(big.bits(), 64 * 256); + + // Check that the generated constants match the computed vector types. + assert_eq!(I32.by(4), Some(I32X4)); + assert_eq!(F64.by(8), Some(F64X8)); + } + + #[test] + fn dynamic_vectors() { + // Identification. + assert_eq!(I8X16XN.is_dynamic_vector(), true); + assert_eq!(F32X8XN.is_dynamic_vector(), true); + assert_eq!(F64X4XN.is_dynamic_vector(), true); + assert_eq!(I128X2XN.is_dynamic_vector(), true); + + // Lane counts. + assert_eq!(I16X8XN.lane_count(), 0); + assert_eq!(I16X8XN.min_lane_count(), 8); + + // Change lane counts + assert_eq!(I8X8XN.by(2), None); + + // Conversions to and from vectors. + assert_eq!(I8.by(16).unwrap().vector_to_dynamic(), Some(I8X16XN)); + assert_eq!(I16.by(8).unwrap().vector_to_dynamic(), Some(I16X8XN)); + assert_eq!(F16.by(8).unwrap().vector_to_dynamic(), Some(F16X8XN)); + assert_eq!(I32.by(4).unwrap().vector_to_dynamic(), Some(I32X4XN)); + assert_eq!(F32.by(4).unwrap().vector_to_dynamic(), Some(F32X4XN)); + assert_eq!(F64.by(2).unwrap().vector_to_dynamic(), Some(F64X2XN)); + assert_eq!(I128.by(2).unwrap().vector_to_dynamic(), Some(I128X2XN)); + assert_eq!(F128.by(2).unwrap().vector_to_dynamic(), Some(F128X2XN)); + + assert_eq!(I128X2XN.dynamic_to_vector(), Some(I128X2)); + assert_eq!(F16X4XN.dynamic_to_vector(), Some(F16X4)); + assert_eq!(F32X4XN.dynamic_to_vector(), Some(F32X4)); + assert_eq!(F64X4XN.dynamic_to_vector(), Some(F64X4)); + assert_eq!(F128X4XN.dynamic_to_vector(), Some(F128X4)); + assert_eq!(I32X2XN.dynamic_to_vector(), Some(I32X2)); + assert_eq!(I32X8XN.dynamic_to_vector(), Some(I32X8)); + assert_eq!(I16X16XN.dynamic_to_vector(), Some(I16X16)); + assert_eq!(I8X32XN.dynamic_to_vector(), Some(I8X32)); + + assert_eq!(I8X64.vector_to_dynamic(), None); + assert_eq!(F32X16.vector_to_dynamic(), None); + assert_eq!(I64X8.vector_to_dynamic(), None); + assert_eq!(I128X4.vector_to_dynamic(), None); + } + + #[test] + fn format_scalars() { + assert_eq!(I8.to_string(), "i8"); + assert_eq!(I16.to_string(), "i16"); + assert_eq!(I32.to_string(), "i32"); + assert_eq!(I64.to_string(), "i64"); + assert_eq!(I128.to_string(), "i128"); + assert_eq!(F32.to_string(), "f32"); + assert_eq!(F64.to_string(), "f64"); + } + + #[test] + fn format_vectors() { + assert_eq!(I8.by(64).unwrap().to_string(), "i8x64"); + assert_eq!(F64.by(2).unwrap().to_string(), "f64x2"); + assert_eq!(I8.by(3), None); + assert_eq!(I8.by(512), None); + assert_eq!(INVALID.by(4), None); + } + + #[test] + fn as_truthy() { + assert_eq!(I32X4.as_truthy(), I32X4); + assert_eq!(I32.as_truthy(), I8); + assert_eq!(I32X4.as_truthy_pedantic(), I32X4); + assert_eq!(I32.as_truthy_pedantic(), I32); + } + + #[test] + fn int_from_size() { + assert_eq!(Type::int(0), None); + assert_eq!(Type::int(8), Some(I8)); + assert_eq!(Type::int(33), None); + assert_eq!(Type::int(64), Some(I64)); + + assert_eq!(Type::int_with_byte_size(0), None); + assert_eq!(Type::int_with_byte_size(2), Some(I16)); + assert_eq!(Type::int_with_byte_size(6), None); + assert_eq!(Type::int_with_byte_size(16), Some(I128)); + + // Ensure `int_with_byte_size` handles overflow properly + let evil = 0xE001_u16; + assert_eq!(evil.wrapping_mul(8), 8, "check the constant is correct"); + assert_eq!(Type::int_with_byte_size(evil), None); + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/user_stack_maps.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/user_stack_maps.rs new file mode 100644 index 000000000..6ad9b697c --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/ir/user_stack_maps.rs @@ -0,0 +1,199 @@ +//! User-defined stack maps. +//! +//! This module provides types allowing users to define stack maps and associate +//! them with safepoints. +//! +//! A **safepoint** is a program point (i.e. CLIF instruction) where it must be +//! safe to run GC. Currently all non-tail call instructions are considered +//! safepoints. (This does *not* allow, for example, skipping safepoints for +//! calls that are statically known not to trigger collections, or to have a +//! safepoint on a volatile load to a page that gets protected when it is time +//! to GC, triggering a fault that pauses the mutator and lets the collector do +//! its work before resuming the mutator. We can lift this restriction in the +//! future, if necessary.) +//! +//! A **stack map** is a description of where to find all the GC-managed values +//! that are live at a particular safepoint. Stack maps let the collector find +//! on-stack roots. Each stack map is logically a set of offsets into the stack +//! frame and the type of value at that associated offset. However, because the +//! stack layout isn't defined until much later in the compiler's pipeline, each +//! stack map entry instead includes both an `ir::StackSlot` and an offset +//! within that slot. +//! +//! These stack maps are **user-defined** in that it is the CLIF producer's +//! responsibility to identify and spill the live GC-managed values and attach +//! the associated stack map entries to each safepoint themselves (see +//! `cranelift_frontend::Function::declare_needs_stack_map` and +//! `cranelift_codegen::ir::DataFlowGraph::append_user_stack_map_entry`). Cranelift +//! will not insert spills and record these stack map entries automatically. +//! +//! Logically, a set of stack maps for a function record a table of the form: +//! +//! ```text +//! +---------------------+-------------------------------------------+ +//! | Instruction Pointer | SP-Relative Offsets of Live GC References | +//! +---------------------+-------------------------------------------+ +//! | 0x12345678 | 2, 6, 12 | +//! | 0x1234abcd | 2, 6 | +//! | ... | ... | +//! +---------------------+-------------------------------------------+ +//! ``` +//! +//! Where "instruction pointer" is an instruction pointer within the function, +//! and "offsets of live GC references" contains the offsets (in units of words) +//! from the frame's stack pointer where live GC references are stored on the +//! stack. Instruction pointers within the function that do not have an entry in +//! this table are not GC safepoints. +//! +//! Because +//! +//! * offsets of live GC references are relative from the stack pointer, and +//! * stack frames grow down from higher addresses to lower addresses, +//! +//! to get a pointer to a live reference at offset `x` within a stack frame, you +//! add `x` to the frame's stack pointer. +//! +//! For example, to calculate the pointer to the live GC reference inside "frame +//! 1" below, you would do `frame_1_sp + x`: +//! +//! ```text +//! Stack +//! +-------------------+ +//! | Frame 0 | +//! | | +//! | | | +//! | +-------------------+ <--- Frame 0's SP +//! | | Frame 1 | +//! Grows | | +//! down | | +//! | | Live GC reference | --+-- +//! | | | | +//! | | | | +//! V | | x = offset of live GC reference +//! | | | +//! | | | +//! +-------------------+ --+-- <--- Frame 1's SP +//! | Frame 2 | +//! | ... | +//! ``` +//! +//! An individual `UserStackMap` is associated with just one instruction pointer +//! within the function, contains the size of the stack frame, and represents +//! the stack frame as a bitmap. There is one bit per word in the stack frame, +//! and if the bit is set, then the word contains a live GC reference. +//! +//! Note that a caller's outgoing argument stack slots (if any) and callee's +//! incoming argument stack slots (if any) overlap, so we must choose which +//! function's stack maps record live GC references in these slots. We record +//! the incoming arguments in the callee's stack map. This choice plays nice +//! with tail calls, where by the time we transfer control to the callee, the +//! caller no longer exists. + +use crate::ir; +use cranelift_bitset::CompoundBitSet; +use cranelift_entity::PrimaryMap; +use smallvec::SmallVec; + +pub(crate) type UserStackMapEntryVec = SmallVec<[UserStackMapEntry; 4]>; + +/// A stack map entry describes a single GC-managed value and its location on +/// the stack. +/// +/// A stack map entry is associated with a particular instruction, and that +/// instruction must be a safepoint. The GC-managed value must be stored in the +/// described location across this entry's instruction. +#[derive(Clone, Debug, PartialEq, Hash)] +#[cfg_attr( + feature = "enable-serde", + derive(serde_derive::Serialize, serde_derive::Deserialize) +)] +pub struct UserStackMapEntry { + /// The type of the value stored in this stack map entry. + pub ty: ir::Type, + + /// The stack slot that this stack map entry is within. + pub slot: ir::StackSlot, + + /// The offset within the stack slot where this entry's value can be found. + pub offset: u32, +} + +/// A compiled stack map, describing the location of many GC-managed values. +/// +/// A stack map is associated with a particular instruction, and that +/// instruction is a safepoint. +#[derive(Clone, Debug, PartialEq)] +#[cfg_attr( + feature = "enable-serde", + derive(serde_derive::Deserialize, serde_derive::Serialize) +)] +pub struct UserStackMap { + // Offsets into the frame's sized stack slots that are GC references, by type. + by_type: SmallVec<[(ir::Type, CompoundBitSet); 1]>, + + // The offset of the sized stack slots, from SP, for this stack map's + // associated PC. + // + // This is initially `None` upon construction during lowering, but filled in + // after regalloc during emission when we have the precise frame layout. + sp_to_sized_stack_slots: Option, +} + +impl UserStackMap { + /// Coalesce the given entries into a new `UserStackMap`. + pub(crate) fn new( + entries: &[UserStackMapEntry], + stack_slot_offsets: &PrimaryMap, + ) -> Self { + let mut by_type = SmallVec::<[(ir::Type, CompoundBitSet); 1]>::default(); + + for entry in entries { + let offset = stack_slot_offsets[entry.slot] + entry.offset; + let offset = usize::try_from(offset).unwrap(); + + // Don't bother trying to avoid an `O(n)` search here: `n` is + // basically always one in practice; even if it isn't, there aren't + // that many different CLIF types. + let index = by_type + .iter() + .position(|(ty, _)| *ty == entry.ty) + .unwrap_or_else(|| { + by_type.push((entry.ty, CompoundBitSet::with_capacity(offset + 1))); + by_type.len() - 1 + }); + + by_type[index].1.insert(offset); + } + + UserStackMap { + by_type, + sp_to_sized_stack_slots: None, + } + } + + /// Finalize this stack map by filling in the SP-to-stack-slots offset. + pub(crate) fn finalize(&mut self, sp_to_sized_stack_slots: u32) { + debug_assert!(self.sp_to_sized_stack_slots.is_none()); + self.sp_to_sized_stack_slots = Some(sp_to_sized_stack_slots); + } + + /// Iterate over the entries in this stack map. + /// + /// Yields pairs of the type of GC reference that is at the offset, and the + /// offset from SP. If a pair `(i64, 0x42)` is yielded, for example, then + /// when execution is at this stack map's associated PC, `SP + 0x42` is a + /// pointer to an `i64`, and that `i64` is a live GC reference. + pub fn entries(&self) -> impl Iterator + '_ { + let sp_to_sized_stack_slots = self.sp_to_sized_stack_slots.expect( + "`sp_to_sized_stack_slots` should have been filled in before this stack map was used", + ); + self.by_type.iter().flat_map(move |(ty, bitset)| { + bitset.iter().map(move |slot_offset| { + ( + *ty, + sp_to_sized_stack_slots + u32::try_from(slot_offset).unwrap(), + ) + }) + }) + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/isa/aarch64/abi.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/isa/aarch64/abi.rs new file mode 100644 index 000000000..9ade3ebea --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/isa/aarch64/abi.rs @@ -0,0 +1,1614 @@ +//! Implementation of a standard AArch64 ABI. + +use crate::ir; +use crate::ir::types; +use crate::ir::types::*; +use crate::ir::MemFlags; +use crate::ir::{dynamic_to_fixed, ExternalName, LibCall, Signature}; +use crate::isa; +use crate::isa::aarch64::{inst::*, settings as aarch64_settings, AArch64Backend}; +use crate::isa::unwind::UnwindInst; +use crate::isa::winch; +use crate::machinst::*; +use crate::settings; +use crate::CodegenResult; +use alloc::boxed::Box; +use alloc::vec::Vec; +use regalloc2::{MachineEnv, PReg, PRegSet}; +use smallvec::{smallvec, SmallVec}; +use std::borrow::ToOwned; +use std::sync::OnceLock; + +// We use a generic implementation that factors out AArch64 and x64 ABI commonalities, because +// these ABIs are very similar. + +/// Support for the AArch64 ABI from the callee side (within a function body). +pub(crate) type AArch64Callee = Callee; + +/// Support for the AArch64 ABI from the caller side (at a callsite). +pub(crate) type AArch64CallSite = CallSite; + +impl Into for StackAMode { + fn into(self) -> AMode { + match self { + StackAMode::IncomingArg(off, stack_args_size) => AMode::IncomingArg { + off: i64::from(stack_args_size) - off, + }, + StackAMode::Slot(off) => AMode::SlotOffset { off }, + StackAMode::OutgoingArg(off) => AMode::SPOffset { off }, + } + } +} + +// Returns the size of stack space needed to store the +// `clobbered_callee_saved` registers. +fn compute_clobber_size(clobbered_callee_saves: &[Writable]) -> u32 { + let mut int_regs = 0; + let mut vec_regs = 0; + for ® in clobbered_callee_saves { + match reg.to_reg().class() { + RegClass::Int => { + int_regs += 1; + } + RegClass::Float => { + vec_regs += 1; + } + RegClass::Vector => unreachable!(), + } + } + + // Round up to multiple of 2, to keep 16-byte stack alignment. + let int_save_bytes = (int_regs + (int_regs & 1)) * 8; + // The Procedure Call Standard for the Arm 64-bit Architecture + // (AAPCS64, including several related ABIs such as the one used by + // Windows) mandates saving only the bottom 8 bytes of the vector + // registers, so we round up the number of registers to ensure + // proper stack alignment (similarly to the situation with + // `int_reg`). + let vec_reg_size = 8; + let vec_save_padding = vec_regs & 1; + // FIXME: SVE: ABI is different to Neon, so do we treat all vec regs as Z-regs? + let vec_save_bytes = (vec_regs + vec_save_padding) * vec_reg_size; + + int_save_bytes + vec_save_bytes +} + +/// AArch64-specific ABI behavior. This struct just serves as an implementation +/// point for the trait; it is never actually instantiated. +pub struct AArch64MachineDeps; + +impl IsaFlags for aarch64_settings::Flags { + fn is_forward_edge_cfi_enabled(&self) -> bool { + self.use_bti() + } +} + +impl ABIMachineSpec for AArch64MachineDeps { + type I = Inst; + + type F = aarch64_settings::Flags; + + /// This is the limit for the size of argument and return-value areas on the + /// stack. We place a reasonable limit here to avoid integer overflow issues + /// with 32-bit arithmetic: for now, 128 MB. + const STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024; + + fn word_bits() -> u32 { + 64 + } + + /// Return required stack alignment in bytes. + fn stack_align(_call_conv: isa::CallConv) -> u32 { + 16 + } + + fn compute_arg_locs( + call_conv: isa::CallConv, + flags: &settings::Flags, + params: &[ir::AbiParam], + args_or_rets: ArgsOrRets, + add_ret_area_ptr: bool, + mut args: ArgsAccumulator, + ) -> CodegenResult<(u32, Option)> { + let is_apple_cc = call_conv == isa::CallConv::AppleAarch64; + let is_winch_return = call_conv == isa::CallConv::Winch && args_or_rets == ArgsOrRets::Rets; + + // See AArch64 ABI (https://github.com/ARM-software/abi-aa/blob/2021Q1/aapcs64/aapcs64.rst#64parameter-passing), sections 6.4. + // + // MacOS aarch64 is slightly different, see also + // https://developer.apple.com/documentation/xcode/writing_arm64_code_for_apple_platforms. + // We are diverging from the MacOS aarch64 implementation in the + // following ways: + // - sign- and zero- extensions of data types less than 32 bits are not + // implemented yet. + // - we align the arguments stack space to a 16-bytes boundary, while + // the MacOS allows aligning only on 8 bytes. In practice it means we're + // slightly overallocating when calling, which is fine, and doesn't + // break our other invariants that the stack is always allocated in + // 16-bytes chunks. + + let mut next_xreg = if call_conv == isa::CallConv::Tail { + // We reserve `x0` for the return area pointer. For simplicity, we + // reserve it even when there is no return area pointer needed. This + // also means that identity functions don't have to shuffle arguments to + // different return registers because we shifted all argument register + // numbers down by one to make space for the return area pointer. + // + // Also, we cannot use all allocatable GPRs as arguments because we need + // at least one allocatable register for holding the callee address in + // indirect calls. So skip `x1` also, reserving it for that role. + 2 + } else { + 0 + }; + let mut next_vreg = 0; + let mut next_stack: u32 = 0; + + // Note on return values: on the regular ABI, we may return values + // in 8 registers for V128 and I64 registers independently of the + // number of register values returned in the other class. That is, + // we can return values in up to 8 integer and + // 8 vector registers at once. + let max_per_class_reg_vals = 8; // x0-x7 and v0-v7 + let mut remaining_reg_vals = 16; + + let ret_area_ptr = if add_ret_area_ptr { + debug_assert_eq!(args_or_rets, ArgsOrRets::Args); + if call_conv != isa::CallConv::Winch { + // In the AAPCS64 calling convention the return area pointer is + // stored in x8. + Some(ABIArg::reg( + xreg(8).to_real_reg().unwrap(), + I64, + ir::ArgumentExtension::None, + ir::ArgumentPurpose::Normal, + )) + } else { + // Use x0 for the return area pointer in the Winch calling convention + // to simplify the ABI handling code in Winch by avoiding an AArch64 + // special case to assign it to x8. + next_xreg += 1; + Some(ABIArg::reg( + xreg(0).to_real_reg().unwrap(), + I64, + ir::ArgumentExtension::None, + ir::ArgumentPurpose::Normal, + )) + } + } else { + None + }; + + for (i, param) in params.into_iter().enumerate() { + if is_apple_cc && param.value_type == types::F128 && !flags.enable_llvm_abi_extensions() + { + panic!( + "f128 args/return values not supported for apple_aarch64 unless LLVM ABI extensions are enabled" + ); + } + + let (rcs, reg_types) = Inst::rc_for_type(param.value_type)?; + + if let ir::ArgumentPurpose::StructReturn = param.purpose { + assert!( + call_conv != isa::CallConv::Tail, + "support for StructReturn parameters is not implemented for the `tail` \ + calling convention yet", + ); + } + + if let ir::ArgumentPurpose::StructArgument(_) = param.purpose { + panic!( + "StructArgument parameters are not supported on arm64. \ + Use regular pointer arguments instead." + ); + } + + if let ir::ArgumentPurpose::StructReturn = param.purpose { + // FIXME add assert_eq!(args_or_rets, ArgsOrRets::Args); once + // ensure_struct_return_ptr_is_returned is gone. + assert!( + param.value_type == types::I64, + "StructReturn must be a pointer sized integer" + ); + args.push(ABIArg::Slots { + slots: smallvec![ABIArgSlot::Reg { + reg: xreg(8).to_real_reg().unwrap(), + ty: types::I64, + extension: param.extension, + },], + purpose: ir::ArgumentPurpose::StructReturn, + }); + continue; + } + + // Handle multi register params + // + // See AArch64 ABI (https://github.com/ARM-software/abi-aa/blob/2021Q1/aapcs64/aapcs64.rst#642parameter-passing-rules), (Section 6.4.2 Stage C). + // + // For arguments with alignment of 16 we round up the register number + // to the next even value. So we can never allocate for example an i128 + // to X1 and X2, we have to skip one register and do X2, X3 + // (Stage C.8) + // Note: The Apple ABI deviates a bit here. They don't respect Stage C.8 + // and will happily allocate a i128 to X1 and X2 + // + // For integer types with alignment of 16 we also have the additional + // restriction of passing the lower half in Xn and the upper half in Xn+1 + // (Stage C.9) + // + // For examples of how LLVM handles this: https://godbolt.org/z/bhd3vvEfh + // + // On the Apple ABI it is unspecified if we can spill half the value into the stack + // i.e load the lower half into x7 and the upper half into the stack + // LLVM does not seem to do this, so we are going to replicate that behaviour + let is_multi_reg = rcs.len() >= 2; + if is_multi_reg { + assert!( + rcs.len() == 2, + "Unable to handle multi reg params with more than 2 regs" + ); + assert!( + rcs == &[RegClass::Int, RegClass::Int], + "Unable to handle non i64 regs" + ); + + let reg_class_space = max_per_class_reg_vals - next_xreg; + let reg_space = remaining_reg_vals; + + if reg_space >= 2 && reg_class_space >= 2 { + // The aarch64 ABI does not allow us to start a split argument + // at an odd numbered register. So we need to skip one register + // + // TODO: The Fast ABI should probably not skip the register + if !is_apple_cc && next_xreg % 2 != 0 { + next_xreg += 1; + } + + let lower_reg = xreg(next_xreg); + let upper_reg = xreg(next_xreg + 1); + + args.push(ABIArg::Slots { + slots: smallvec![ + ABIArgSlot::Reg { + reg: lower_reg.to_real_reg().unwrap(), + ty: reg_types[0], + extension: param.extension, + }, + ABIArgSlot::Reg { + reg: upper_reg.to_real_reg().unwrap(), + ty: reg_types[1], + extension: param.extension, + }, + ], + purpose: param.purpose, + }); + + next_xreg += 2; + remaining_reg_vals -= 2; + continue; + } + } else { + // Single Register parameters + let rc = rcs[0]; + let next_reg = match rc { + RegClass::Int => &mut next_xreg, + RegClass::Float => &mut next_vreg, + RegClass::Vector => unreachable!(), + }; + + let push_to_reg = if is_winch_return { + // Winch uses the first register to return the last result + i == params.len() - 1 + } else { + // Use max_per_class_reg_vals & remaining_reg_vals otherwise + *next_reg < max_per_class_reg_vals && remaining_reg_vals > 0 + }; + + if push_to_reg { + let reg = match rc { + RegClass::Int => xreg(*next_reg), + RegClass::Float => vreg(*next_reg), + RegClass::Vector => unreachable!(), + }; + // Overlay Z-regs on V-regs for parameter passing. + let ty = if param.value_type.is_dynamic_vector() { + dynamic_to_fixed(param.value_type) + } else { + param.value_type + }; + args.push(ABIArg::reg( + reg.to_real_reg().unwrap(), + ty, + param.extension, + param.purpose, + )); + *next_reg += 1; + remaining_reg_vals -= 1; + continue; + } + } + + // Spill to the stack + + if args_or_rets == ArgsOrRets::Rets && !flags.enable_multi_ret_implicit_sret() { + return Err(crate::CodegenError::Unsupported( + "Too many return values to fit in registers. \ + Use a StructReturn argument instead. (#9510)" + .to_owned(), + )); + } + + // Compute the stack slot's size. + let size = (ty_bits(param.value_type) / 8) as u32; + + let size = if is_apple_cc || is_winch_return { + // MacOS and Winch aarch64 allows stack slots with + // sizes less than 8 bytes. They still need to be + // properly aligned on their natural data alignment, + // though. + size + } else { + // Every arg takes a minimum slot of 8 bytes. (16-byte stack + // alignment happens separately after all args.) + std::cmp::max(size, 8) + }; + + if !is_winch_return { + // Align the stack slot. + debug_assert!(size.is_power_of_two()); + next_stack = align_to(next_stack, size); + } + + let slots = reg_types + .iter() + .copied() + // Build the stack locations from each slot + .scan(next_stack, |next_stack, ty| { + let slot_offset = *next_stack as i64; + *next_stack += (ty_bits(ty) / 8) as u32; + + Some((ty, slot_offset)) + }) + .map(|(ty, offset)| ABIArgSlot::Stack { + offset, + ty, + extension: param.extension, + }) + .collect(); + + args.push(ABIArg::Slots { + slots, + purpose: param.purpose, + }); + + next_stack += size; + } + + let extra_arg = if let Some(ret_area_ptr) = ret_area_ptr { + args.push_non_formal(ret_area_ptr); + Some(args.args().len() - 1) + } else { + None + }; + + if is_winch_return { + winch::reverse_stack(args, next_stack, false); + } + + next_stack = align_to(next_stack, 16); + + Ok((next_stack, extra_arg)) + } + + fn gen_load_stack(mem: StackAMode, into_reg: Writable, ty: Type) -> Inst { + Inst::gen_load(into_reg, mem.into(), ty, MemFlags::trusted()) + } + + fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Inst { + Inst::gen_store(mem.into(), from_reg, ty, MemFlags::trusted()) + } + + fn gen_move(to_reg: Writable, from_reg: Reg, ty: Type) -> Inst { + Inst::gen_move(to_reg, from_reg, ty) + } + + fn gen_extend( + to_reg: Writable, + from_reg: Reg, + signed: bool, + from_bits: u8, + to_bits: u8, + ) -> Inst { + assert!(from_bits < to_bits); + Inst::Extend { + rd: to_reg, + rn: from_reg, + signed, + from_bits, + to_bits, + } + } + + fn gen_args(args: Vec) -> Inst { + Inst::Args { args } + } + + fn gen_rets(rets: Vec) -> Inst { + Inst::Rets { rets } + } + + fn gen_add_imm( + _call_conv: isa::CallConv, + into_reg: Writable, + from_reg: Reg, + imm: u32, + ) -> SmallInstVec { + let imm = imm as u64; + let mut insts = SmallVec::new(); + if let Some(imm12) = Imm12::maybe_from_u64(imm) { + insts.push(Inst::AluRRImm12 { + alu_op: ALUOp::Add, + size: OperandSize::Size64, + rd: into_reg, + rn: from_reg, + imm12, + }); + } else { + let scratch2 = writable_tmp2_reg(); + assert_ne!(scratch2.to_reg(), from_reg); + // `gen_add_imm` is only ever called after register allocation has taken place, and as a + // result it's ok to reuse the scratch2 register here. If that changes, we'll need to + // plumb through a way to allocate temporary virtual registers + insts.extend(Inst::load_constant(scratch2, imm.into(), &mut |_| scratch2)); + insts.push(Inst::AluRRRExtend { + alu_op: ALUOp::Add, + size: OperandSize::Size64, + rd: into_reg, + rn: from_reg, + rm: scratch2.to_reg(), + extendop: ExtendOp::UXTX, + }); + } + insts + } + + fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec { + let mut insts = SmallVec::new(); + insts.push(Inst::AluRRRExtend { + alu_op: ALUOp::SubS, + size: OperandSize::Size64, + rd: writable_zero_reg(), + rn: stack_reg(), + rm: limit_reg, + extendop: ExtendOp::UXTX, + }); + insts.push(Inst::TrapIf { + trap_code: ir::TrapCode::STACK_OVERFLOW, + // Here `Lo` == "less than" when interpreting the two + // operands as unsigned integers. + kind: CondBrKind::Cond(Cond::Lo), + }); + insts + } + + fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable) -> Inst { + // FIXME: Do something different for dynamic types? + let mem = mem.into(); + Inst::LoadAddr { rd: into_reg, mem } + } + + fn get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg { + spilltmp_reg() + } + + fn gen_load_base_offset(into_reg: Writable, base: Reg, offset: i32, ty: Type) -> Inst { + let mem = AMode::RegOffset { + rn: base, + off: offset as i64, + }; + Inst::gen_load(into_reg, mem, ty, MemFlags::trusted()) + } + + fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Inst { + let mem = AMode::RegOffset { + rn: base, + off: offset as i64, + }; + Inst::gen_store(mem, from_reg, ty, MemFlags::trusted()) + } + + fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec { + if amount == 0 { + return SmallVec::new(); + } + + let (amount, is_sub) = if amount > 0 { + (amount as u64, false) + } else { + (-amount as u64, true) + }; + + let alu_op = if is_sub { ALUOp::Sub } else { ALUOp::Add }; + + let mut ret = SmallVec::new(); + if let Some(imm12) = Imm12::maybe_from_u64(amount) { + let adj_inst = Inst::AluRRImm12 { + alu_op, + size: OperandSize::Size64, + rd: writable_stack_reg(), + rn: stack_reg(), + imm12, + }; + ret.push(adj_inst); + } else { + let tmp = writable_spilltmp_reg(); + // `gen_sp_reg_adjust` is called after regalloc2, so it's acceptable to reuse `tmp` for + // intermediates in `load_constant`. + let const_inst = Inst::load_constant(tmp, amount, &mut |_| tmp); + let adj_inst = Inst::AluRRRExtend { + alu_op, + size: OperandSize::Size64, + rd: writable_stack_reg(), + rn: stack_reg(), + rm: tmp.to_reg(), + extendop: ExtendOp::UXTX, + }; + ret.extend(const_inst); + ret.push(adj_inst); + } + ret + } + + fn gen_prologue_frame_setup( + call_conv: isa::CallConv, + flags: &settings::Flags, + isa_flags: &aarch64_settings::Flags, + frame_layout: &FrameLayout, + ) -> SmallInstVec { + let setup_frame = frame_layout.setup_area_size > 0; + let mut insts = SmallVec::new(); + + match select_api_key(isa_flags, call_conv, setup_frame) { + Some(key) => { + insts.push(Inst::Paci { key }); + if flags.unwind_info() { + insts.push(Inst::Unwind { + inst: UnwindInst::Aarch64SetPointerAuth { + return_addresses: true, + }, + }); + } + } + None => { + if isa_flags.use_bti() { + insts.push(Inst::Bti { + targets: BranchTargetType::C, + }); + } + + if flags.unwind_info() && call_conv == isa::CallConv::AppleAarch64 { + // The macOS unwinder seems to require this. + insts.push(Inst::Unwind { + inst: UnwindInst::Aarch64SetPointerAuth { + return_addresses: false, + }, + }); + } + } + } + + if setup_frame { + // stp fp (x29), lr (x30), [sp, #-16]! + insts.push(Inst::StoreP64 { + rt: fp_reg(), + rt2: link_reg(), + mem: PairAMode::SPPreIndexed { + simm7: SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(), + }, + flags: MemFlags::trusted(), + }); + + if flags.unwind_info() { + insts.push(Inst::Unwind { + inst: UnwindInst::PushFrameRegs { + offset_upward_to_caller_sp: frame_layout.setup_area_size, + }, + }); + } + + // mov fp (x29), sp. This uses the ADDI rd, rs, 0 form of `MOV` because + // the usual encoding (`ORR`) does not work with SP. + insts.push(Inst::AluRRImm12 { + alu_op: ALUOp::Add, + size: OperandSize::Size64, + rd: writable_fp_reg(), + rn: stack_reg(), + imm12: Imm12 { + bits: 0, + shift12: false, + }, + }); + } + + insts + } + + fn gen_epilogue_frame_restore( + call_conv: isa::CallConv, + _flags: &settings::Flags, + _isa_flags: &aarch64_settings::Flags, + frame_layout: &FrameLayout, + ) -> SmallInstVec { + let setup_frame = frame_layout.setup_area_size > 0; + let mut insts = SmallVec::new(); + + if setup_frame { + // N.B.: sp is already adjusted to the appropriate place by the + // clobber-restore code (which also frees the fixed frame). Hence, there + // is no need for the usual `mov sp, fp` here. + + // `ldp fp, lr, [sp], #16` + insts.push(Inst::LoadP64 { + rt: writable_fp_reg(), + rt2: writable_link_reg(), + mem: PairAMode::SPPostIndexed { + simm7: SImm7Scaled::maybe_from_i64(16, types::I64).unwrap(), + }, + flags: MemFlags::trusted(), + }); + } + + if call_conv == isa::CallConv::Tail && frame_layout.tail_args_size > 0 { + insts.extend(Self::gen_sp_reg_adjust( + frame_layout.tail_args_size.try_into().unwrap(), + )); + } + + insts + } + + fn gen_return( + call_conv: isa::CallConv, + isa_flags: &aarch64_settings::Flags, + frame_layout: &FrameLayout, + ) -> SmallInstVec { + let setup_frame = frame_layout.setup_area_size > 0; + + match select_api_key(isa_flags, call_conv, setup_frame) { + Some(key) => { + smallvec![Inst::AuthenticatedRet { + key, + is_hint: !isa_flags.has_pauth(), + }] + } + None => { + smallvec![Inst::Ret {}] + } + } + } + + fn gen_probestack(_insts: &mut SmallInstVec, _: u32) { + // TODO: implement if we ever require stack probes on an AArch64 host + // (unlikely unless Lucet is ported) + unimplemented!("Stack probing is unimplemented on AArch64"); + } + + fn gen_inline_probestack( + insts: &mut SmallInstVec, + _call_conv: isa::CallConv, + frame_size: u32, + guard_size: u32, + ) { + // The stack probe loop currently takes 6 instructions and each inline + // probe takes 2 (ish, these numbers sort of depend on the constants). + // Set this to 3 to keep the max size of the probe to 6 instructions. + const PROBE_MAX_UNROLL: u32 = 3; + + // Calculate how many probes we need to perform. Round down, as we only + // need to probe whole guard_size regions we'd otherwise skip over. + let probe_count = frame_size / guard_size; + if probe_count == 0 { + // No probe necessary + } else if probe_count <= PROBE_MAX_UNROLL { + Self::gen_probestack_unroll(insts, guard_size, probe_count) + } else { + Self::gen_probestack_loop(insts, frame_size, guard_size) + } + } + + fn gen_clobber_save( + _call_conv: isa::CallConv, + flags: &settings::Flags, + frame_layout: &FrameLayout, + ) -> SmallVec<[Inst; 16]> { + let (clobbered_int, clobbered_vec) = frame_layout.clobbered_callee_saves_by_class(); + + let mut insts = SmallVec::new(); + let setup_frame = frame_layout.setup_area_size > 0; + + // When a return_call within this function required more stack arguments than we have + // present, resize the incoming argument area of the frame to accommodate those arguments. + let incoming_args_diff = frame_layout.tail_args_size - frame_layout.incoming_args_size; + if incoming_args_diff > 0 { + // Decrement SP to account for the additional space required by a tail call. + insts.extend(Self::gen_sp_reg_adjust(-(incoming_args_diff as i32))); + if flags.unwind_info() { + insts.push(Inst::Unwind { + inst: UnwindInst::StackAlloc { + size: incoming_args_diff, + }, + }); + } + + // Move fp and lr down. + if setup_frame { + // Reload the frame pointer from the stack. + insts.push(Inst::ULoad64 { + rd: regs::writable_fp_reg(), + mem: AMode::SPOffset { + off: i64::from(incoming_args_diff), + }, + flags: MemFlags::trusted(), + }); + + // Store the frame pointer and link register again at the new SP + insts.push(Inst::StoreP64 { + rt: fp_reg(), + rt2: link_reg(), + mem: PairAMode::SignedOffset { + reg: regs::stack_reg(), + simm7: SImm7Scaled::maybe_from_i64(0, types::I64).unwrap(), + }, + flags: MemFlags::trusted(), + }); + + // Keep the frame pointer in sync + insts.push(Self::gen_move( + regs::writable_fp_reg(), + regs::stack_reg(), + types::I64, + )); + } + } + + if flags.unwind_info() && setup_frame { + // The *unwind* frame (but not the actual frame) starts at the + // clobbers, just below the saved FP/LR pair. + insts.push(Inst::Unwind { + inst: UnwindInst::DefineNewFrame { + offset_downward_to_clobbers: frame_layout.clobber_size, + offset_upward_to_caller_sp: frame_layout.setup_area_size, + }, + }); + } + + // We use pre-indexed addressing modes here, rather than the possibly + // more efficient "subtract sp once then used fixed offsets" scheme, + // because (i) we cannot necessarily guarantee that the offset of a + // clobber-save slot will be within a SImm7Scaled (+504-byte) offset + // range of the whole frame including other slots, it is more complex to + // conditionally generate a two-stage SP adjustment (clobbers then fixed + // frame) otherwise, and generally we just want to maintain simplicity + // here for maintainability. Because clobbers are at the top of the + // frame, just below FP, all that is necessary is to use the pre-indexed + // "push" `[sp, #-16]!` addressing mode. + // + // `frame_offset` tracks offset above start-of-clobbers for unwind-info + // purposes. + let mut clobber_offset = frame_layout.clobber_size; + let clobber_offset_change = 16; + let iter = clobbered_int.chunks_exact(2); + + if let [rd] = iter.remainder() { + let rd: Reg = rd.to_reg().into(); + + debug_assert_eq!(rd.class(), RegClass::Int); + // str rd, [sp, #-16]! + insts.push(Inst::Store64 { + rd, + mem: AMode::SPPreIndexed { + simm9: SImm9::maybe_from_i64(-clobber_offset_change).unwrap(), + }, + flags: MemFlags::trusted(), + }); + + if flags.unwind_info() { + clobber_offset -= clobber_offset_change as u32; + insts.push(Inst::Unwind { + inst: UnwindInst::SaveReg { + clobber_offset, + reg: rd.to_real_reg().unwrap(), + }, + }); + } + } + + let mut iter = iter.rev(); + + while let Some([rt, rt2]) = iter.next() { + // .to_reg().into(): Writable --> RealReg --> Reg + let rt: Reg = rt.to_reg().into(); + let rt2: Reg = rt2.to_reg().into(); + + debug_assert!(rt.class() == RegClass::Int); + debug_assert!(rt2.class() == RegClass::Int); + + // stp rt, rt2, [sp, #-16]! + insts.push(Inst::StoreP64 { + rt, + rt2, + mem: PairAMode::SPPreIndexed { + simm7: SImm7Scaled::maybe_from_i64(-clobber_offset_change, types::I64).unwrap(), + }, + flags: MemFlags::trusted(), + }); + + if flags.unwind_info() { + clobber_offset -= clobber_offset_change as u32; + insts.push(Inst::Unwind { + inst: UnwindInst::SaveReg { + clobber_offset, + reg: rt.to_real_reg().unwrap(), + }, + }); + insts.push(Inst::Unwind { + inst: UnwindInst::SaveReg { + clobber_offset: clobber_offset + (clobber_offset_change / 2) as u32, + reg: rt2.to_real_reg().unwrap(), + }, + }); + } + } + + let store_vec_reg = |rd| Inst::FpuStore64 { + rd, + mem: AMode::SPPreIndexed { + simm9: SImm9::maybe_from_i64(-clobber_offset_change).unwrap(), + }, + flags: MemFlags::trusted(), + }; + let iter = clobbered_vec.chunks_exact(2); + + if let [rd] = iter.remainder() { + let rd: Reg = rd.to_reg().into(); + + debug_assert_eq!(rd.class(), RegClass::Float); + insts.push(store_vec_reg(rd)); + + if flags.unwind_info() { + clobber_offset -= clobber_offset_change as u32; + insts.push(Inst::Unwind { + inst: UnwindInst::SaveReg { + clobber_offset, + reg: rd.to_real_reg().unwrap(), + }, + }); + } + } + + let store_vec_reg_pair = |rt, rt2| { + let clobber_offset_change = 16; + + ( + Inst::FpuStoreP64 { + rt, + rt2, + mem: PairAMode::SPPreIndexed { + simm7: SImm7Scaled::maybe_from_i64(-clobber_offset_change, F64).unwrap(), + }, + flags: MemFlags::trusted(), + }, + clobber_offset_change as u32, + ) + }; + let mut iter = iter.rev(); + + while let Some([rt, rt2]) = iter.next() { + let rt: Reg = rt.to_reg().into(); + let rt2: Reg = rt2.to_reg().into(); + + debug_assert_eq!(rt.class(), RegClass::Float); + debug_assert_eq!(rt2.class(), RegClass::Float); + + let (inst, clobber_offset_change) = store_vec_reg_pair(rt, rt2); + + insts.push(inst); + + if flags.unwind_info() { + clobber_offset -= clobber_offset_change; + insts.push(Inst::Unwind { + inst: UnwindInst::SaveReg { + clobber_offset, + reg: rt.to_real_reg().unwrap(), + }, + }); + insts.push(Inst::Unwind { + inst: UnwindInst::SaveReg { + clobber_offset: clobber_offset + clobber_offset_change / 2, + reg: rt2.to_real_reg().unwrap(), + }, + }); + } + } + + // Allocate the fixed frame below the clobbers if necessary. + let stack_size = frame_layout.fixed_frame_storage_size + frame_layout.outgoing_args_size; + if stack_size > 0 { + insts.extend(Self::gen_sp_reg_adjust(-(stack_size as i32))); + if flags.unwind_info() { + insts.push(Inst::Unwind { + inst: UnwindInst::StackAlloc { size: stack_size }, + }); + } + } + + insts + } + + fn gen_clobber_restore( + _call_conv: isa::CallConv, + _flags: &settings::Flags, + frame_layout: &FrameLayout, + ) -> SmallVec<[Inst; 16]> { + let mut insts = SmallVec::new(); + let (clobbered_int, clobbered_vec) = frame_layout.clobbered_callee_saves_by_class(); + + // Free the fixed frame if necessary. + let stack_size = frame_layout.fixed_frame_storage_size + frame_layout.outgoing_args_size; + if stack_size > 0 { + insts.extend(Self::gen_sp_reg_adjust(stack_size as i32)); + } + + let load_vec_reg = |rd| Inst::FpuLoad64 { + rd, + mem: AMode::SPPostIndexed { + simm9: SImm9::maybe_from_i64(16).unwrap(), + }, + flags: MemFlags::trusted(), + }; + let load_vec_reg_pair = |rt, rt2| Inst::FpuLoadP64 { + rt, + rt2, + mem: PairAMode::SPPostIndexed { + simm7: SImm7Scaled::maybe_from_i64(16, F64).unwrap(), + }, + flags: MemFlags::trusted(), + }; + + let mut iter = clobbered_vec.chunks_exact(2); + + while let Some([rt, rt2]) = iter.next() { + let rt: Writable = rt.map(|r| r.into()); + let rt2: Writable = rt2.map(|r| r.into()); + + debug_assert_eq!(rt.to_reg().class(), RegClass::Float); + debug_assert_eq!(rt2.to_reg().class(), RegClass::Float); + insts.push(load_vec_reg_pair(rt, rt2)); + } + + debug_assert!(iter.remainder().len() <= 1); + + if let [rd] = iter.remainder() { + let rd: Writable = rd.map(|r| r.into()); + + debug_assert_eq!(rd.to_reg().class(), RegClass::Float); + insts.push(load_vec_reg(rd)); + } + + let mut iter = clobbered_int.chunks_exact(2); + + while let Some([rt, rt2]) = iter.next() { + let rt: Writable = rt.map(|r| r.into()); + let rt2: Writable = rt2.map(|r| r.into()); + + debug_assert_eq!(rt.to_reg().class(), RegClass::Int); + debug_assert_eq!(rt2.to_reg().class(), RegClass::Int); + // ldp rt, rt2, [sp], #16 + insts.push(Inst::LoadP64 { + rt, + rt2, + mem: PairAMode::SPPostIndexed { + simm7: SImm7Scaled::maybe_from_i64(16, I64).unwrap(), + }, + flags: MemFlags::trusted(), + }); + } + + debug_assert!(iter.remainder().len() <= 1); + + if let [rd] = iter.remainder() { + let rd: Writable = rd.map(|r| r.into()); + + debug_assert_eq!(rd.to_reg().class(), RegClass::Int); + // ldr rd, [sp], #16 + insts.push(Inst::ULoad64 { + rd, + mem: AMode::SPPostIndexed { + simm9: SImm9::maybe_from_i64(16).unwrap(), + }, + flags: MemFlags::trusted(), + }); + } + + insts + } + + fn gen_call(dest: &CallDest, tmp: Writable, info: CallInfo<()>) -> SmallVec<[Inst; 2]> { + let mut insts = SmallVec::new(); + match dest { + CallDest::ExtName(name, RelocDistance::Near) => { + let info = Box::new(info.map(|()| name.clone())); + insts.push(Inst::Call { info }); + } + CallDest::ExtName(name, RelocDistance::Far) => { + insts.push(Inst::LoadExtName { + rd: tmp, + name: Box::new(name.clone()), + offset: 0, + }); + let info = Box::new(info.map(|()| tmp.to_reg())); + insts.push(Inst::CallInd { info }); + } + CallDest::Reg(reg) => { + let info = Box::new(info.map(|()| *reg)); + insts.push(Inst::CallInd { info }); + } + } + + insts + } + + fn gen_memcpy Writable>( + call_conv: isa::CallConv, + dst: Reg, + src: Reg, + size: usize, + mut alloc_tmp: F, + ) -> SmallVec<[Self::I; 8]> { + let mut insts = SmallVec::new(); + let arg0 = writable_xreg(0); + let arg1 = writable_xreg(1); + let arg2 = writable_xreg(2); + let tmp = alloc_tmp(Self::word_type()); + insts.extend(Inst::load_constant(tmp, size as u64, &mut alloc_tmp)); + insts.push(Inst::Call { + info: Box::new(CallInfo { + dest: ExternalName::LibCall(LibCall::Memcpy), + uses: smallvec![ + CallArgPair { + vreg: dst, + preg: arg0.to_reg() + }, + CallArgPair { + vreg: src, + preg: arg1.to_reg() + }, + CallArgPair { + vreg: tmp.to_reg(), + preg: arg2.to_reg() + } + ], + defs: smallvec![], + clobbers: Self::get_regs_clobbered_by_call(call_conv), + caller_conv: call_conv, + callee_conv: call_conv, + callee_pop_size: 0, + }), + }); + insts + } + + fn get_number_of_spillslots_for_value( + rc: RegClass, + vector_size: u32, + _isa_flags: &Self::F, + ) -> u32 { + assert_eq!(vector_size % 8, 0); + // We allocate in terms of 8-byte slots. + match rc { + RegClass::Int => 1, + RegClass::Float => vector_size / 8, + RegClass::Vector => unreachable!(), + } + } + + fn get_machine_env(flags: &settings::Flags, _call_conv: isa::CallConv) -> &MachineEnv { + if flags.enable_pinned_reg() { + static MACHINE_ENV: OnceLock = OnceLock::new(); + MACHINE_ENV.get_or_init(|| create_reg_env(true)) + } else { + static MACHINE_ENV: OnceLock = OnceLock::new(); + MACHINE_ENV.get_or_init(|| create_reg_env(false)) + } + } + + fn get_regs_clobbered_by_call(call_conv: isa::CallConv) -> PRegSet { + match call_conv { + isa::CallConv::Winch => WINCH_CLOBBERS, + _ => DEFAULT_AAPCS_CLOBBERS, + } + } + + fn get_ext_mode( + call_conv: isa::CallConv, + specified: ir::ArgumentExtension, + ) -> ir::ArgumentExtension { + if call_conv == isa::CallConv::AppleAarch64 { + specified + } else { + ir::ArgumentExtension::None + } + } + + fn compute_frame_layout( + call_conv: isa::CallConv, + flags: &settings::Flags, + sig: &Signature, + regs: &[Writable], + is_leaf: bool, + incoming_args_size: u32, + tail_args_size: u32, + fixed_frame_storage_size: u32, + outgoing_args_size: u32, + ) -> FrameLayout { + let mut regs: Vec> = regs + .iter() + .cloned() + .filter(|r| { + is_reg_saved_in_prologue(call_conv, flags.enable_pinned_reg(), sig, r.to_reg()) + }) + .collect(); + + // Sort registers for deterministic code output. We can do an unstable + // sort because the registers will be unique (there are no dups). + regs.sort_unstable(); + + // Compute clobber size. + let clobber_size = compute_clobber_size(®s); + + // Compute linkage frame size. + let setup_area_size = if flags.preserve_frame_pointers() + || !is_leaf + // The function arguments that are passed on the stack are addressed + // relative to the Frame Pointer. + || incoming_args_size > 0 + || clobber_size > 0 + || fixed_frame_storage_size > 0 + { + 16 // FP, LR + } else { + 0 + }; + + // Return FrameLayout structure. + FrameLayout { + incoming_args_size, + tail_args_size, + setup_area_size, + clobber_size, + fixed_frame_storage_size, + outgoing_args_size, + clobbered_callee_saves: regs, + } + } +} + +impl AArch64MachineDeps { + fn gen_probestack_unroll(insts: &mut SmallInstVec, guard_size: u32, probe_count: u32) { + // When manually unrolling adjust the stack pointer and then write a zero + // to the stack at that offset. This generates something like + // `sub sp, sp, #1, lsl #12` followed by `stur wzr, [sp]`. + // + // We do this because valgrind expects us to never write beyond the stack + // pointer and associated redzone. + // See: https://github.com/bytecodealliance/wasmtime/issues/7454 + for _ in 0..probe_count { + insts.extend(Self::gen_sp_reg_adjust(-(guard_size as i32))); + + insts.push(Inst::gen_store( + AMode::SPOffset { off: 0 }, + zero_reg(), + I32, + MemFlags::trusted(), + )); + } + + // Restore the stack pointer to its original value + insts.extend(Self::gen_sp_reg_adjust((guard_size * probe_count) as i32)); + } + + fn gen_probestack_loop(insts: &mut SmallInstVec, frame_size: u32, guard_size: u32) { + // The non-unrolled version uses two temporary registers. The + // `start` contains the current offset from sp and counts downwards + // during the loop by increments of `guard_size`. The `end` is + // the size of the frame and where we stop. + // + // Note that this emission is all post-regalloc so it should be ok + // to use the temporary registers here as input/output as the loop + // itself is not allowed to use the registers. + let start = writable_spilltmp_reg(); + let end = writable_tmp2_reg(); + // `gen_inline_probestack` is called after regalloc2, so it's acceptable to reuse + // `start` and `end` as temporaries in load_constant. + insts.extend(Inst::load_constant(start, 0, &mut |_| start)); + insts.extend(Inst::load_constant(end, frame_size.into(), &mut |_| end)); + insts.push(Inst::StackProbeLoop { + start, + end: end.to_reg(), + step: Imm12::maybe_from_u64(guard_size.into()).unwrap(), + }); + } +} + +fn select_api_key( + isa_flags: &aarch64_settings::Flags, + call_conv: isa::CallConv, + setup_frame: bool, +) -> Option { + if isa_flags.sign_return_address() && (setup_frame || isa_flags.sign_return_address_all()) { + // The `tail` calling convention uses a zero modifier rather than SP + // because tail calls may happen with a different stack pointer than + // when the function was entered, meaning that it won't be the same when + // the return address is decrypted. + Some(if isa_flags.sign_return_address_with_bkey() { + match call_conv { + isa::CallConv::Tail => APIKey::BZ, + _ => APIKey::BSP, + } + } else { + match call_conv { + isa::CallConv::Tail => APIKey::AZ, + _ => APIKey::ASP, + } + }) + } else { + None + } +} + +impl AArch64CallSite { + pub fn emit_return_call( + mut self, + ctx: &mut Lower, + args: isle::ValueSlice, + backend: &AArch64Backend, + ) { + let new_stack_arg_size = + u32::try_from(self.sig(ctx.sigs()).sized_stack_arg_space()).unwrap(); + + ctx.abi_mut().accumulate_tail_args_size(new_stack_arg_size); + + // Put all arguments in registers and stack slots (within that newly + // allocated stack space). + self.emit_args(ctx, args); + self.emit_stack_ret_arg_for_tail_call(ctx); + + let dest = self.dest().clone(); + let uses = self.take_uses(); + let key = select_api_key(&backend.isa_flags, isa::CallConv::Tail, true); + + match dest { + CallDest::ExtName(callee, RelocDistance::Near) => { + let info = Box::new(ReturnCallInfo { + dest: callee, + uses, + key, + new_stack_arg_size, + }); + ctx.emit(Inst::ReturnCall { info }); + } + CallDest::ExtName(name, RelocDistance::Far) => { + let callee = ctx.alloc_tmp(types::I64).only_reg().unwrap(); + ctx.emit(Inst::LoadExtName { + rd: callee, + name: Box::new(name), + offset: 0, + }); + let info = Box::new(ReturnCallInfo { + dest: callee.to_reg(), + uses, + key, + new_stack_arg_size, + }); + ctx.emit(Inst::ReturnCallInd { info }); + } + CallDest::Reg(callee) => { + let info = Box::new(ReturnCallInfo { + dest: callee, + uses, + key, + new_stack_arg_size, + }); + ctx.emit(Inst::ReturnCallInd { info }); + } + } + } +} + +/// Is the given register saved in the prologue if clobbered, i.e., is it a +/// callee-save? +fn is_reg_saved_in_prologue( + _call_conv: isa::CallConv, + enable_pinned_reg: bool, + sig: &Signature, + r: RealReg, +) -> bool { + // FIXME: We need to inspect whether a function is returning Z or P regs too. + let save_z_regs = sig + .params + .iter() + .filter(|p| p.value_type.is_dynamic_vector()) + .count() + != 0; + + match r.class() { + RegClass::Int => { + // x19 - x28 inclusive are callee-saves. + // However, x21 is the pinned reg if `enable_pinned_reg` + // is set, and is implicitly globally-allocated, hence not + // callee-saved in prologues. + if enable_pinned_reg && r.hw_enc() == PINNED_REG { + false + } else { + r.hw_enc() >= 19 && r.hw_enc() <= 28 + } + } + RegClass::Float => { + // If a subroutine takes at least one argument in scalable vector registers + // or scalable predicate registers, or if it is a function that returns + // results in such registers, it must ensure that the entire contents of + // z8-z23 are preserved across the call. In other cases it need only + // preserve the low 64 bits of z8-z15. + if save_z_regs { + r.hw_enc() >= 8 && r.hw_enc() <= 23 + } else { + // v8 - v15 inclusive are callee-saves. + r.hw_enc() >= 8 && r.hw_enc() <= 15 + } + } + RegClass::Vector => unreachable!(), + } +} + +const fn default_aapcs_clobbers() -> PRegSet { + PRegSet::empty() + // x0 - x17 inclusive are caller-saves. + .with(xreg_preg(0)) + .with(xreg_preg(1)) + .with(xreg_preg(2)) + .with(xreg_preg(3)) + .with(xreg_preg(4)) + .with(xreg_preg(5)) + .with(xreg_preg(6)) + .with(xreg_preg(7)) + .with(xreg_preg(8)) + .with(xreg_preg(9)) + .with(xreg_preg(10)) + .with(xreg_preg(11)) + .with(xreg_preg(12)) + .with(xreg_preg(13)) + .with(xreg_preg(14)) + .with(xreg_preg(15)) + .with(xreg_preg(16)) + .with(xreg_preg(17)) + // v0 - v7 inclusive and v16 - v31 inclusive are + // caller-saves. The upper 64 bits of v8 - v15 inclusive are + // also caller-saves. However, because we cannot currently + // represent partial registers to regalloc2, we indicate here + // that every vector register is caller-save. Because this + // function is used at *callsites*, approximating in this + // direction (save more than necessary) is conservative and + // thus safe. + // + // Note that we exclude clobbers from a call instruction when + // a call instruction's callee has the same ABI as the caller + // (the current function body); this is safe (anything + // clobbered by callee can be clobbered by caller as well) and + // avoids unnecessary saves of v8-v15 in the prologue even + // though we include them as defs here. + .with(vreg_preg(0)) + .with(vreg_preg(1)) + .with(vreg_preg(2)) + .with(vreg_preg(3)) + .with(vreg_preg(4)) + .with(vreg_preg(5)) + .with(vreg_preg(6)) + .with(vreg_preg(7)) + .with(vreg_preg(8)) + .with(vreg_preg(9)) + .with(vreg_preg(10)) + .with(vreg_preg(11)) + .with(vreg_preg(12)) + .with(vreg_preg(13)) + .with(vreg_preg(14)) + .with(vreg_preg(15)) + .with(vreg_preg(16)) + .with(vreg_preg(17)) + .with(vreg_preg(18)) + .with(vreg_preg(19)) + .with(vreg_preg(20)) + .with(vreg_preg(21)) + .with(vreg_preg(22)) + .with(vreg_preg(23)) + .with(vreg_preg(24)) + .with(vreg_preg(25)) + .with(vreg_preg(26)) + .with(vreg_preg(27)) + .with(vreg_preg(28)) + .with(vreg_preg(29)) + .with(vreg_preg(30)) + .with(vreg_preg(31)) +} + +const fn winch_clobbers() -> PRegSet { + PRegSet::empty() + .with(xreg_preg(0)) + .with(xreg_preg(1)) + .with(xreg_preg(2)) + .with(xreg_preg(3)) + .with(xreg_preg(4)) + .with(xreg_preg(5)) + .with(xreg_preg(6)) + .with(xreg_preg(7)) + .with(xreg_preg(8)) + .with(xreg_preg(9)) + .with(xreg_preg(10)) + .with(xreg_preg(11)) + .with(xreg_preg(12)) + .with(xreg_preg(13)) + .with(xreg_preg(14)) + .with(xreg_preg(15)) + .with(xreg_preg(16)) + .with(xreg_preg(17)) + // x18 is used to carry platform state and is not allocatable by Winch. + // + // x19 - x27 are considered caller-saved in Winch's calling convention. + .with(xreg_preg(19)) + .with(xreg_preg(20)) + .with(xreg_preg(21)) + .with(xreg_preg(22)) + .with(xreg_preg(23)) + .with(xreg_preg(24)) + .with(xreg_preg(25)) + .with(xreg_preg(26)) + .with(xreg_preg(27)) + // x28 is used as the shadow stack pointer and is considered + // callee-saved. + // + // All vregs are considered caller-saved. + .with(vreg_preg(0)) + .with(vreg_preg(1)) + .with(vreg_preg(2)) + .with(vreg_preg(3)) + .with(vreg_preg(4)) + .with(vreg_preg(5)) + .with(vreg_preg(6)) + .with(vreg_preg(7)) + .with(vreg_preg(8)) + .with(vreg_preg(9)) + .with(vreg_preg(10)) + .with(vreg_preg(11)) + .with(vreg_preg(12)) + .with(vreg_preg(13)) + .with(vreg_preg(14)) + .with(vreg_preg(15)) + .with(vreg_preg(16)) + .with(vreg_preg(17)) + .with(vreg_preg(18)) + .with(vreg_preg(19)) + .with(vreg_preg(20)) + .with(vreg_preg(21)) + .with(vreg_preg(22)) + .with(vreg_preg(23)) + .with(vreg_preg(24)) + .with(vreg_preg(25)) + .with(vreg_preg(26)) + .with(vreg_preg(27)) + .with(vreg_preg(28)) + .with(vreg_preg(29)) + .with(vreg_preg(30)) + .with(vreg_preg(31)) +} + +const DEFAULT_AAPCS_CLOBBERS: PRegSet = default_aapcs_clobbers(); +const WINCH_CLOBBERS: PRegSet = winch_clobbers(); + +fn create_reg_env(enable_pinned_reg: bool) -> MachineEnv { + fn preg(r: Reg) -> PReg { + r.to_real_reg().unwrap().into() + } + + let mut env = MachineEnv { + preferred_regs_by_class: [ + vec![ + preg(xreg(0)), + preg(xreg(1)), + preg(xreg(2)), + preg(xreg(3)), + preg(xreg(4)), + preg(xreg(5)), + preg(xreg(6)), + preg(xreg(7)), + preg(xreg(8)), + preg(xreg(9)), + preg(xreg(10)), + preg(xreg(11)), + preg(xreg(12)), + preg(xreg(13)), + preg(xreg(14)), + preg(xreg(15)), + // x16 and x17 are spilltmp and tmp2 (see above). + // x18 could be used by the platform to carry inter-procedural state; + // conservatively assume so and make it not allocatable. + // x19-28 are callee-saved and so not preferred. + // x21 is the pinned register (if enabled) and not allocatable if so. + // x29 is FP, x30 is LR, x31 is SP/ZR. + ], + vec![ + preg(vreg(0)), + preg(vreg(1)), + preg(vreg(2)), + preg(vreg(3)), + preg(vreg(4)), + preg(vreg(5)), + preg(vreg(6)), + preg(vreg(7)), + // v8-15 are callee-saved and so not preferred. + preg(vreg(16)), + preg(vreg(17)), + preg(vreg(18)), + preg(vreg(19)), + preg(vreg(20)), + preg(vreg(21)), + preg(vreg(22)), + preg(vreg(23)), + preg(vreg(24)), + preg(vreg(25)), + preg(vreg(26)), + preg(vreg(27)), + preg(vreg(28)), + preg(vreg(29)), + preg(vreg(30)), + preg(vreg(31)), + ], + // Vector Regclass is unused + vec![], + ], + non_preferred_regs_by_class: [ + vec![ + preg(xreg(19)), + preg(xreg(20)), + // x21 is pinned reg if enabled; we add to this list below if not. + preg(xreg(22)), + preg(xreg(23)), + preg(xreg(24)), + preg(xreg(25)), + preg(xreg(26)), + preg(xreg(27)), + preg(xreg(28)), + ], + vec![ + preg(vreg(8)), + preg(vreg(9)), + preg(vreg(10)), + preg(vreg(11)), + preg(vreg(12)), + preg(vreg(13)), + preg(vreg(14)), + preg(vreg(15)), + ], + // Vector Regclass is unused + vec![], + ], + fixed_stack_slots: vec![], + scratch_by_class: [None, None, None], + }; + + if !enable_pinned_reg { + debug_assert_eq!(PINNED_REG, 21); // We assumed this above in hardcoded reg list. + env.non_preferred_regs_by_class[0].push(preg(xreg(PINNED_REG))); + } + + env +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/isa/aarch64/inst.isle b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/isa/aarch64/inst.isle new file mode 100644 index 000000000..7b82b1ce7 --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/isa/aarch64/inst.isle @@ -0,0 +1,5111 @@ +;; Instruction formats. +(type MInst + (enum + ;; A no-op of zero size. + (Nop0) + + ;; A no-op that is one instruction large. + (Nop4) + + ;; An ALU operation with two register sources and a register destination. + (AluRRR + (alu_op ALUOp) + (size OperandSize) + (rd WritableReg) + (rn Reg) + (rm Reg)) + + ;; An ALU operation with three register sources and a register destination. + (AluRRRR + (alu_op ALUOp3) + (size OperandSize) + (rd WritableReg) + (rn Reg) + (rm Reg) + (ra Reg)) + + ;; An ALU operation with a register source and an immediate-12 source, and a register + ;; destination. + (AluRRImm12 + (alu_op ALUOp) + (size OperandSize) + (rd WritableReg) + (rn Reg) + (imm12 Imm12)) + + ;; An ALU operation with a register source and an immediate-logic source, and a register destination. + (AluRRImmLogic + (alu_op ALUOp) + (size OperandSize) + (rd WritableReg) + (rn Reg) + (imml ImmLogic)) + + ;; An ALU operation with a register source and an immediate-shiftamt source, and a register destination. + (AluRRImmShift + (alu_op ALUOp) + (size OperandSize) + (rd WritableReg) + (rn Reg) + (immshift ImmShift)) + + ;; An ALU operation with two register sources, one of which can be shifted, and a register + ;; destination. + (AluRRRShift + (alu_op ALUOp) + (size OperandSize) + (rd WritableReg) + (rn Reg) + (rm Reg) + (shiftop ShiftOpAndAmt)) + + ;; An ALU operation with two register sources, one of which can be {zero,sign}-extended and + ;; shifted, and a register destination. + (AluRRRExtend + (alu_op ALUOp) + (size OperandSize) + (rd WritableReg) + (rn Reg) + (rm Reg) + (extendop ExtendOp)) + + ;; A bit op instruction with a single register source. + (BitRR + (op BitOp) + (size OperandSize) + (rd WritableReg) + (rn Reg)) + + ;; An unsigned (zero-extending) 8-bit load. + (ULoad8 + (rd WritableReg) + (mem AMode) + (flags MemFlags)) + + ;; A signed (sign-extending) 8-bit load. + (SLoad8 + (rd WritableReg) + (mem AMode) + (flags MemFlags)) + + ;; An unsigned (zero-extending) 16-bit load. + (ULoad16 + (rd WritableReg) + (mem AMode) + (flags MemFlags)) + + ;; A signed (sign-extending) 16-bit load. + (SLoad16 + (rd WritableReg) + (mem AMode) + (flags MemFlags)) + + ;; An unsigned (zero-extending) 32-bit load. + (ULoad32 + (rd WritableReg) + (mem AMode) + (flags MemFlags)) + + ;; A signed (sign-extending) 32-bit load. + (SLoad32 + (rd WritableReg) + (mem AMode) + (flags MemFlags)) + + ;; A 64-bit load. + (ULoad64 + (rd WritableReg) + (mem AMode) + (flags MemFlags)) + + ;; An 8-bit store. + (Store8 + (rd Reg) + (mem AMode) + (flags MemFlags)) + + ;; A 16-bit store. + (Store16 + (rd Reg) + (mem AMode) + (flags MemFlags)) + + ;; A 32-bit store. + (Store32 + (rd Reg) + (mem AMode) + (flags MemFlags)) + + ;; A 64-bit store. + (Store64 + (rd Reg) + (mem AMode) + (flags MemFlags)) + + ;; A store of a pair of registers. + (StoreP64 + (rt Reg) + (rt2 Reg) + (mem PairAMode) + (flags MemFlags)) + + ;; A load of a pair of registers. + (LoadP64 + (rt WritableReg) + (rt2 WritableReg) + (mem PairAMode) + (flags MemFlags)) + + ;; A MOV instruction. These are encoded as ORR's (AluRRR form). + ;; The 32-bit version zeroes the top 32 bits of the + ;; destination, which is effectively an alias for an unsigned + ;; 32-to-64-bit extension. + (Mov + (size OperandSize) + (rd WritableReg) + (rm Reg)) + + ;; Like `Move` but with a particular `PReg` source (for implementing CLIF + ;; instructions like `get_stack_pointer`). + (MovFromPReg + (rd WritableReg) + (rm PReg)) + + ;; Like `Move` but with a particular `PReg` destination (for + ;; implementing CLIF instructions like `set_pinned_reg`). + (MovToPReg + (rd PReg) + (rm Reg)) + + ;; A MOV[Z,N] with a 16-bit immediate. + (MovWide + (op MoveWideOp) + (rd WritableReg) + (imm MoveWideConst) + (size OperandSize)) + + ;; A MOVK with a 16-bit immediate. Modifies its register; we + ;; model this with a separate input `rn` and output `rd` virtual + ;; register, with a regalloc constraint to tie them together. + (MovK + (rd WritableReg) + (rn Reg) + (imm MoveWideConst) + (size OperandSize)) + + + ;; A sign- or zero-extend operation. + (Extend + (rd WritableReg) + (rn Reg) + (signed bool) + (from_bits u8) + (to_bits u8)) + + ;; A conditional-select operation. + (CSel + (rd WritableReg) + (cond Cond) + (rn Reg) + (rm Reg)) + + ;; A conditional-select negation operation. + (CSNeg + (rd WritableReg) + (cond Cond) + (rn Reg) + (rm Reg)) + + ;; A conditional-set operation. + (CSet + (rd WritableReg) + (cond Cond)) + + ;; A conditional-set-mask operation. + (CSetm + (rd WritableReg) + (cond Cond)) + + ;; A conditional comparison with a second register. + (CCmp + (size OperandSize) + (rn Reg) + (rm Reg) + (nzcv NZCV) + (cond Cond)) + + ;; A conditional comparison with an immediate. + (CCmpImm + (size OperandSize) + (rn Reg) + (imm UImm5) + (nzcv NZCV) + (cond Cond)) + + ;; A synthetic insn, which is a load-linked store-conditional loop, that has the overall + ;; effect of atomically modifying a memory location in a particular way. Because we have + ;; no way to explain to the regalloc about earlyclobber registers, this instruction has + ;; completely fixed operand registers, and we rely on the RA's coalescing to remove copies + ;; in the surrounding code to the extent it can. Load- and store-exclusive instructions, + ;; with acquire-release semantics, are used to access memory. The operand conventions are: + ;; + ;; x25 (rd) address + ;; x26 (rd) second operand for `op` + ;; x27 (wr) old value + ;; x24 (wr) scratch reg; value afterwards has no meaning + ;; x28 (wr) scratch reg; value afterwards has no meaning + (AtomicRMWLoop + (ty Type) ;; I8, I16, I32 or I64 + (op AtomicRMWLoopOp) + (flags MemFlags) + (addr Reg) + (operand Reg) + (oldval WritableReg) + (scratch1 WritableReg) + (scratch2 WritableReg)) + + ;; Similar to AtomicRMWLoop, a compare-and-swap operation implemented using a load-linked + ;; store-conditional loop, with acquire-release semantics. + ;; Note that the operand conventions, although very similar to AtomicRMWLoop, are different: + ;; + ;; x25 (rd) address + ;; x26 (rd) expected value + ;; x28 (rd) replacement value + ;; x27 (wr) old value + ;; x24 (wr) scratch reg; value afterwards has no meaning + (AtomicCASLoop + (ty Type) ;; I8, I16, I32 or I64 + (flags MemFlags) + (addr Reg) + (expected Reg) + (replacement Reg) + (oldval WritableReg) + (scratch WritableReg)) + + ;; An atomic read-modify-write operation. These instructions require the + ;; Large System Extension (LSE) ISA support (FEAT_LSE). The instructions have + ;; acquire-release semantics. + (AtomicRMW + (op AtomicRMWOp) + (rs Reg) + (rt WritableReg) + (rn Reg) + (ty Type) + (flags MemFlags)) + + ;; An atomic compare-and-swap operation. These instructions require the + ;; Large System Extension (LSE) ISA support (FEAT_LSE). The instructions have + ;; acquire-release semantics. + (AtomicCAS + ;; `rd` is really `rs` in the encoded instruction (so `rd` == `rs`); we separate + ;; them here to have separate use and def vregs for regalloc. + (rd WritableReg) + (rs Reg) + (rt Reg) + (rn Reg) + (ty Type) + (flags MemFlags)) + + ;; Read `access_ty` bits from address `rt`, either 8, 16, 32 or 64-bits, and put + ;; it in `rn`, optionally zero-extending to fill a word or double word result. + ;; This instruction is sequentially consistent. + (LoadAcquire + (access_ty Type) ;; I8, I16, I32 or I64 + (rt WritableReg) + (rn Reg) + (flags MemFlags)) + + ;; Write the lowest `ty` bits of `rt` to address `rn`. + ;; This instruction is sequentially consistent. + (StoreRelease + (access_ty Type) ;; I8, I16, I32 or I64 + (rt Reg) + (rn Reg) + (flags MemFlags)) + + ;; A memory fence. This must provide ordering to ensure that, at a minimum, neither loads + ;; nor stores may move forwards or backwards across the fence. Currently emitted as "dmb + ;; ish". This instruction is sequentially consistent. + (Fence) + + ;; Consumption of speculative data barrier. + (Csdb) + + ;; FPU 32-bit move. + (FpuMove32 + (rd WritableReg) + (rn Reg)) + + ;; FPU move. Note that this is distinct from a vector-register + ;; move; moving just 64 bits seems to be significantly faster. + (FpuMove64 + (rd WritableReg) + (rn Reg)) + + ;; Vector register move. + (FpuMove128 + (rd WritableReg) + (rn Reg)) + + ;; Move to scalar from a vector element. + (FpuMoveFromVec + (rd WritableReg) + (rn Reg) + (idx u8) + (size VectorSize)) + + ;; Zero-extend a SIMD & FP scalar to the full width of a vector register. + ;; 16-bit scalars require half-precision floating-point support (FEAT_FP16). + (FpuExtend + (rd WritableReg) + (rn Reg) + (size ScalarSize)) + + ;; 1-op FPU instruction. + (FpuRR + (fpu_op FPUOp1) + (size ScalarSize) + (rd WritableReg) + (rn Reg)) + + ;; 2-op FPU instruction. + (FpuRRR + (fpu_op FPUOp2) + (size ScalarSize) + (rd WritableReg) + (rn Reg) + (rm Reg)) + + (FpuRRI + (fpu_op FPUOpRI) + (rd WritableReg) + (rn Reg)) + + ;; Variant of FpuRRI that modifies its `rd`, and so we name the + ;; input state `ri` (for "input") and constrain the two + ;; together. + (FpuRRIMod + (fpu_op FPUOpRIMod) + (rd WritableReg) + (ri Reg) + (rn Reg)) + + + ;; 3-op FPU instruction. + ;; 16-bit scalars require half-precision floating-point support (FEAT_FP16). + (FpuRRRR + (fpu_op FPUOp3) + (size ScalarSize) + (rd WritableReg) + (rn Reg) + (rm Reg) + (ra Reg)) + + ;; FPU comparison. + (FpuCmp + (size ScalarSize) + (rn Reg) + (rm Reg)) + + ;; Floating-point load, half-precision (16 bit). + (FpuLoad16 + (rd WritableReg) + (mem AMode) + (flags MemFlags)) + + ;; Floating-point store, half-precision (16 bit). + (FpuStore16 + (rd Reg) + (mem AMode) + (flags MemFlags)) + + ;; Floating-point load, single-precision (32 bit). + (FpuLoad32 + (rd WritableReg) + (mem AMode) + (flags MemFlags)) + + ;; Floating-point store, single-precision (32 bit). + (FpuStore32 + (rd Reg) + (mem AMode) + (flags MemFlags)) + + ;; Floating-point load, double-precision (64 bit). + (FpuLoad64 + (rd WritableReg) + (mem AMode) + (flags MemFlags)) + + ;; Floating-point store, double-precision (64 bit). + (FpuStore64 + (rd Reg) + (mem AMode) + (flags MemFlags)) + + ;; Floating-point/vector load, 128 bit. + (FpuLoad128 + (rd WritableReg) + (mem AMode) + (flags MemFlags)) + + ;; Floating-point/vector store, 128 bit. + (FpuStore128 + (rd Reg) + (mem AMode) + (flags MemFlags)) + + ;; A load of a pair of floating-point registers, double precision (64-bit). + (FpuLoadP64 + (rt WritableReg) + (rt2 WritableReg) + (mem PairAMode) + (flags MemFlags)) + + ;; A store of a pair of floating-point registers, double precision (64-bit). + (FpuStoreP64 + (rt Reg) + (rt2 Reg) + (mem PairAMode) + (flags MemFlags)) + + ;; A load of a pair of floating-point registers, 128-bit. + (FpuLoadP128 + (rt WritableReg) + (rt2 WritableReg) + (mem PairAMode) + (flags MemFlags)) + + ;; A store of a pair of floating-point registers, 128-bit. + (FpuStoreP128 + (rt Reg) + (rt2 Reg) + (mem PairAMode) + (flags MemFlags)) + + ;; Conversion: FP -> integer. + (FpuToInt + (op FpuToIntOp) + (rd WritableReg) + (rn Reg)) + + ;; Conversion: integer -> FP. + (IntToFpu + (op IntToFpuOp) + (rd WritableReg) + (rn Reg)) + + ;; FP conditional select, 16 bit. + ;; Requires FEAT_FP16. + (FpuCSel16 + (rd WritableReg) + (rn Reg) + (rm Reg) + (cond Cond)) + + ;; FP conditional select, 32 bit. + (FpuCSel32 + (rd WritableReg) + (rn Reg) + (rm Reg) + (cond Cond)) + + ;; FP conditional select, 64 bit. + (FpuCSel64 + (rd WritableReg) + (rn Reg) + (rm Reg) + (cond Cond)) + + ;; Round to integer. + (FpuRound + (op FpuRoundMode) + (rd WritableReg) + (rn Reg)) + + ;; Move from a GPR to a vector register. The scalar value is parked in the lowest lane + ;; of the destination, and all other lanes are zeroed out. Currently 16-, 32- and 64-bit + ;; transactions are supported. 16-bit moves require FEAT_FP16. + (MovToFpu + (rd WritableReg) + (rn Reg) + (size ScalarSize)) + + ;; Loads a floating-point immediate. + (FpuMoveFPImm + (rd WritableReg) + (imm ASIMDFPModImm) + (size ScalarSize)) + + ;; Move to a vector element from a GPR. + (MovToVec + (rd WritableReg) + (ri Reg) + (rn Reg) + (idx u8) + (size VectorSize)) + + ;; Unsigned move from a vector element to a GPR. + (MovFromVec + (rd WritableReg) + (rn Reg) + (idx u8) + (size ScalarSize)) + + ;; Signed move from a vector element to a GPR. + (MovFromVecSigned + (rd WritableReg) + (rn Reg) + (idx u8) + (size VectorSize) + (scalar_size OperandSize)) + + ;; Duplicate general-purpose register to vector. + (VecDup + (rd WritableReg) + (rn Reg) + (size VectorSize)) + + ;; Duplicate scalar to vector. + (VecDupFromFpu + (rd WritableReg) + (rn Reg) + (size VectorSize) + (lane u8)) + + ;; Duplicate FP immediate to vector. + (VecDupFPImm + (rd WritableReg) + (imm ASIMDFPModImm) + (size VectorSize)) + + ;; Duplicate immediate to vector. + (VecDupImm + (rd WritableReg) + (imm ASIMDMovModImm) + (invert bool) + (size VectorSize)) + + ;; Vector extend. + (VecExtend + (t VecExtendOp) + (rd WritableReg) + (rn Reg) + (high_half bool) + (lane_size ScalarSize)) + + ;; Move vector element to another vector element. + (VecMovElement + (rd WritableReg) + (ri Reg) + (rn Reg) + (dest_idx u8) + (src_idx u8) + (size VectorSize)) + + ;; Vector widening operation. + (VecRRLong + (op VecRRLongOp) + (rd WritableReg) + (rn Reg) + (high_half bool)) + + ;; Vector narrowing operation -- low half. + (VecRRNarrowLow + (op VecRRNarrowOp) + (rd WritableReg) + (rn Reg) + (lane_size ScalarSize)) + + ;; Vector narrowing operation -- high half. + (VecRRNarrowHigh + (op VecRRNarrowOp) + (rd WritableReg) + (ri Reg) + (rn Reg) + (lane_size ScalarSize)) + + ;; 1-operand vector instruction that operates on a pair of elements. + (VecRRPair + (op VecPairOp) + (rd WritableReg) + (rn Reg)) + + ;; 2-operand vector instruction that produces a result with twice the + ;; lane width and half the number of lanes. + (VecRRRLong + (alu_op VecRRRLongOp) + (rd WritableReg) + (rn Reg) + (rm Reg) + (high_half bool)) + + ;; 2-operand vector instruction that produces a result with + ;; twice the lane width and half the number of lanes. Variant + ;; that modifies `rd` (so takes its initial state as `ri`). + (VecRRRLongMod + (alu_op VecRRRLongModOp) + (rd WritableReg) + (ri Reg) + (rn Reg) + (rm Reg) + (high_half bool)) + + ;; 1-operand vector instruction that extends elements of the input + ;; register and operates on a pair of elements. The output lane width + ;; is double that of the input. + (VecRRPairLong + (op VecRRPairLongOp) + (rd WritableReg) + (rn Reg)) + + ;; A vector ALU op. + (VecRRR + (alu_op VecALUOp) + (rd WritableReg) + (rn Reg) + (rm Reg) + (size VectorSize)) + + ;; A vector ALU op modifying a source register. + (VecRRRMod + (alu_op VecALUModOp) + (rd WritableReg) + (ri Reg) + (rn Reg) + (rm Reg) + (size VectorSize)) + + ;; A vector ALU op modifying a source register. + (VecFmlaElem + (alu_op VecALUModOp) + (rd WritableReg) + (ri Reg) + (rn Reg) + (rm Reg) + (size VectorSize) + (idx u8)) + + ;; Vector two register miscellaneous instruction. + (VecMisc + (op VecMisc2) + (rd WritableReg) + (rn Reg) + (size VectorSize)) + + ;; Vector instruction across lanes. + (VecLanes + (op VecLanesOp) + (rd WritableReg) + (rn Reg) + (size VectorSize)) + + ;; Vector shift by immediate Shift Left (immediate), Unsigned Shift Right (immediate) + ;; Signed Shift Right (immediate). These are somewhat unusual in that, for right shifts, + ;; the allowed range of `imm` values is 1 to lane-size-in-bits, inclusive. A zero + ;; right-shift cannot be encoded. Left shifts are "normal", though, having valid `imm` + ;; values from 0 to lane-size-in-bits - 1 inclusive. + (VecShiftImm + (op VecShiftImmOp) + (rd WritableReg) + (rn Reg) + (size VectorSize) + (imm u8)) + + ;; Destructive vector shift by immediate. + (VecShiftImmMod + (op VecShiftImmModOp) + (rd WritableReg) + (ri Reg) + (rn Reg) + (size VectorSize) + (imm u8)) + + ;; Vector extract - create a new vector, being the concatenation of the lowest `imm4` bytes + ;; of `rm` followed by the uppermost `16 - imm4` bytes of `rn`. + (VecExtract + (rd WritableReg) + (rn Reg) + (rm Reg) + (imm4 u8)) + + ;; Table vector lookup - single register table. The table + ;; consists of 8-bit elements and is stored in `rn`, while `rm` + ;; contains 8-bit element indices. This variant emits `TBL`, + ;; which sets elements that correspond to out-of-range indices + ;; (greater than 15) to 0. + (VecTbl + (rd WritableReg) + (rn Reg) + (rm Reg)) + + ;; Table vector lookup - single register table. The table + ;; consists of 8-bit elements and is stored in `rn`, while `rm` + ;; contains 8-bit element indices. This variant emits `TBX`, + ;; which leaves elements that correspond to out-of-range indices + ;; (greater than 15) unmodified. Hence, it takes an input vreg in + ;; `ri` that is constrained to the same allocation as `rd`. + (VecTblExt + (rd WritableReg) + (ri Reg) + (rn Reg) + (rm Reg)) + + ;; Table vector lookup - two register table. The table consists + ;; of 8-bit elements and is stored in `rn` and `rn2`, while + ;; `rm` contains 8-bit element indices. The table registers + ;; `rn` and `rn2` must have consecutive numbers modulo 32, that + ;; is v31 and v0 (in that order) are consecutive registers. + ;; This variant emits `TBL`, which sets out-of-range results to + ;; 0. + (VecTbl2 + (rd WritableReg) + (rn Reg) + (rn2 Reg) + (rm Reg)) + + ;; Table vector lookup - two register table. The table consists + ;; of 8-bit elements and is stored in `rn` and `rn2`, while + ;; `rm` contains 8-bit element indices. The table registers + ;; `rn` and `rn2` must have consecutive numbers modulo 32, that + ;; is v31 and v0 (in that order) are consecutive registers. + ;; This variant emits `TBX`, which leaves out-of-range results + ;; unmodified, hence takes the initial state of the result + ;; register in vreg `ri`. + (VecTbl2Ext + (rd WritableReg) + (ri Reg) + (rn Reg) + (rn2 Reg) + (rm Reg)) + + ;; Load an element and replicate to all lanes of a vector. + (VecLoadReplicate + (rd WritableReg) + (rn Reg) + (size VectorSize) + (flags MemFlags)) + + ;; Vector conditional select, 128 bit. A synthetic instruction, which generates a 4-insn + ;; control-flow diamond. + (VecCSel + (rd WritableReg) + (rn Reg) + (rm Reg) + (cond Cond)) + + ;; Move to the NZCV flags (actually a `MSR NZCV, Xn` insn). + (MovToNZCV + (rn Reg)) + + ;; Move from the NZCV flags (actually a `MRS Xn, NZCV` insn). + (MovFromNZCV + (rd WritableReg)) + + ;; A machine call instruction. N.B.: this allows only a +/- 128MB offset (it uses a relocation + ;; of type `Reloc::Arm64Call`); if the destination distance is not `RelocDistance::Near`, the + ;; code should use a `LoadExtName` / `CallInd` sequence instead, allowing an arbitrary 64-bit + ;; target. + (Call (info BoxCallInfo)) + + ;; A machine indirect-call instruction. + (CallInd (info BoxCallIndInfo)) + + ;; A return-call macro instruction. + (ReturnCall (info BoxReturnCallInfo)) + + ;; An indirect return-call macro instruction. + (ReturnCallInd (info BoxReturnCallIndInfo)) + + ;; A pseudo-instruction that captures register arguments in vregs. + (Args + (args VecArgPair)) + + ;; A pseudo-instruction that moves vregs to return registers. + (Rets + (rets VecRetPair)) + + ;; ---- branches (exactly one must appear at end of BB) ---- + + ;; A machine return instruction. + (Ret) + + ;; A machine return instruction with pointer authentication using SP as the + ;; modifier. This instruction requires pointer authentication support + ;; (FEAT_PAuth) unless `is_hint` is true, in which case it is equivalent to + ;; the combination of a no-op and a return instruction on platforms without + ;; the relevant support. + (AuthenticatedRet + (key APIKey) + (is_hint bool)) + + ;; An unconditional branch. + (Jump + (dest BranchTarget)) + + ;; A conditional branch. Contains two targets; at emission time, both are emitted, but + ;; the MachBuffer knows to truncate the trailing branch if fallthrough. We optimize the + ;; choice of taken/not_taken (inverting the branch polarity as needed) based on the + ;; fallthrough at the time of lowering. + (CondBr + (taken BranchTarget) + (not_taken BranchTarget) + (kind CondBrKind)) + + ;; A conditional branch which tests the `bit` of `rn` and branches + ;; depending on `kind`. + (TestBitAndBranch + (kind TestBitAndBranchKind) + (taken BranchTarget) + (not_taken BranchTarget) + (rn Reg) + (bit u8)) + + ;; A conditional trap: execute a `udf` if the condition is true. This is + ;; one VCode instruction because it uses embedded control flow; it is + ;; logically a single-in, single-out region, but needs to appear as one + ;; unit to the register allocator. + ;; + ;; The `CondBrKind` gives the conditional-branch condition that will + ;; *execute* the embedded `Inst`. (In the emitted code, we use the inverse + ;; of this condition in a branch that skips the trap instruction.) + (TrapIf + (kind CondBrKind) + (trap_code TrapCode)) + + ;; An indirect branch through a register, augmented with set of all + ;; possible successors. + (IndirectBr + (rn Reg) + (targets VecMachLabel)) + + ;; A "break" instruction, used for e.g. traps and debug breakpoints. + (Brk) + + ;; An instruction guaranteed to always be undefined and to trigger an illegal instruction at + ;; runtime. + (Udf + (trap_code TrapCode)) + + ;; Compute the address (using a PC-relative offset) of a memory location, using the `ADR` + ;; instruction. Note that we take a simple offset, not a `MemLabel`, here, because `Adr` is + ;; only used for now in fixed lowering sequences with hardcoded offsets. In the future we may + ;; need full `MemLabel` support. + (Adr + (rd WritableReg) + ;; Offset in range -2^20 .. 2^20. + (off i32)) + + ;; Compute the address (using a PC-relative offset) of a 4KB page. + (Adrp + (rd WritableReg) + (off i32)) + + ;; Raw 32-bit word, used for inline constants and jump-table entries. + (Word4 + (data u32)) + + ;; Raw 64-bit word, used for inline constants. + (Word8 + (data u64)) + + ;; Jump-table sequence, as one compound instruction (see note in lower_inst.rs for rationale). + (JTSequence + (default MachLabel) + (targets BoxVecMachLabel) + (ridx Reg) + (rtmp1 WritableReg) + (rtmp2 WritableReg)) + + ;; Load an inline symbol reference. + (LoadExtName + (rd WritableReg) + (name BoxExternalName) + (offset i64)) + + ;; Load address referenced by `mem` into `rd`. + (LoadAddr + (rd WritableReg) + (mem AMode)) + + ;; Pointer authentication code for instruction address with modifier in SP; + ;; equivalent to a no-op if Pointer authentication (FEAT_PAuth) is not + ;; supported. + (Paci + (key APIKey)) + + ;; Strip pointer authentication code from instruction address in LR; + ;; equivalent to a no-op if Pointer authentication (FEAT_PAuth) is not + ;; supported. + (Xpaclri) + + ;; Branch target identification; equivalent to a no-op if Branch Target + ;; Identification (FEAT_BTI) is not supported. + (Bti + (targets BranchTargetType)) + + ;; Meta-insn, no-op in generated code: emit constant/branch veneer island + ;; at this point (with a guard jump around it) if less than the needed + ;; space is available before the next branch deadline. See the `MachBuffer` + ;; implementation in `machinst/buffer.rs` for the overall algorithm. In + ;; brief, we retain a set of "pending/unresolved label references" from + ;; branches as we scan forward through instructions to emit machine code; + ;; if we notice we're about to go out of range on an unresolved reference, + ;; we stop, emit a bunch of "veneers" (branches in a form that has a longer + ;; range, e.g. a 26-bit-offset unconditional jump), and point the original + ;; label references to those. This is an "island" because it comes in the + ;; middle of the code. + ;; + ;; This meta-instruction is a necessary part of the logic that determines + ;; where to place islands. Ordinarily, we want to place them between basic + ;; blocks, so we compute the worst-case size of each block, and emit the + ;; island before starting a block if we would exceed a deadline before the + ;; end of the block. However, some sequences (such as an inline jumptable) + ;; are variable-length and not accounted for by this logic; so these + ;; lowered sequences include an `EmitIsland` to trigger island generation + ;; where necessary. + (EmitIsland + ;; The needed space before the next deadline. + (needed_space CodeOffset)) + + ;; A call to the `ElfTlsGetAddr` libcall. Returns address of TLS symbol in x0. + (ElfTlsGetAddr + (symbol BoxExternalName) + (rd WritableReg) + (tmp WritableReg)) + + (MachOTlsGetAddr + (symbol ExternalName) + (rd WritableReg)) + + ;; An unwind pseudo-instruction. + (Unwind + (inst UnwindInst)) + + ;; A dummy use, useful to keep a value alive. + (DummyUse + (reg Reg)) + + ;; Emits an inline stack probe loop. + ;; + ;; Note that this is emitted post-regalloc so `start` and `end` can be + ;; temporary registers such as the spilltmp and tmp2 registers. This also + ;; means that the internal codegen can't use these registers. + (StackProbeLoop (start WritableReg) + (end Reg) + (step Imm12)))) + +(model ALUOp (enum + (Add #x00) ;; 0 + (Sub #x01) + (Orr #x02) + (OrrNot #x03) + (And #x04) + (AndNot #x05) + (Eor #x06) + (EorNot #x07) + (SubS #x08) + (SDiv #x09) + (UDiv #x0a) + (Extr #x0b) + (Lsr #x0c) + (Asr #x0d) + (Lsl #x0e))) + +;; An ALU operation. This can be paired with several instruction formats +;; below (see `Inst`) in any combination. +(type ALUOp + (enum + (Add) + (Sub) + (Orr) + (OrrNot) + (And) + (AndS) + (AndNot) + ;; XOR (AArch64 calls this "EOR") + (Eor) + ;; XNOR (AArch64 calls this "EOR-NOT") + (EorNot) + ;; Add, setting flags + (AddS) + ;; Sub, setting flags + (SubS) + ;; Signed multiply, high-word result + (SMulH) + ;; Unsigned multiply, high-word result + (UMulH) + (SDiv) + (UDiv) + (Extr) + (Lsr) + (Asr) + (Lsl) + ;; Add with carry + (Adc) + ;; Add with carry, settings flags + (AdcS) + ;; Subtract with carry + (Sbc) + ;; Subtract with carry, settings flags + (SbcS) +)) + +;; An ALU operation with three arguments. +(type ALUOp3 + (enum + ;; Multiply-add + (MAdd) + ;; Multiply-sub + (MSub) + ;; Unsigned-Multiply-add + (UMAddL) + ;; Signed-Multiply-add + (SMAddL) +)) + +(type MoveWideOp + (enum + (MovZ) + (MovN) +)) + +(type UImm5 (primitive UImm5)) +(model Imm12 (type (bv 24))) +(type Imm12 (primitive Imm12)) +(model ImmLogic (type (bv 64))) +(type ImmLogic (primitive ImmLogic)) +(model ImmShift (type (bv 6))) +(type ImmShift (primitive ImmShift)) +(model ShiftOpAndAmt (type (bv 16))) +(type ShiftOpAndAmt (primitive ShiftOpAndAmt)) +(model MoveWideConst (type (bv 16))) +(type MoveWideConst (primitive MoveWideConst)) +(type NZCV (primitive NZCV)) +(type ASIMDFPModImm (primitive ASIMDFPModImm)) +(type ASIMDMovModImm (primitive ASIMDMovModImm)) +(type SImm7Scaled (primitive SImm7Scaled)) + +(type BoxCallInfo (primitive BoxCallInfo)) +(type BoxCallIndInfo (primitive BoxCallIndInfo)) +(type BoxReturnCallInfo (primitive BoxReturnCallInfo)) +(type BoxReturnCallIndInfo (primitive BoxReturnCallIndInfo)) +(type CondBrKind (primitive CondBrKind)) +(type BranchTarget (primitive BranchTarget)) +(type BoxJTSequenceInfo (primitive BoxJTSequenceInfo)) +(type CodeOffset (primitive CodeOffset)) +(type VecMachLabel extern (enum)) + +(model ExtendOp (enum + (UXTB #b000) + (UXTH #b001) + (UXTW #b010) + (UXTX #b011) + (SXTB #b100) + (SXTH #b101) + (SXTW #b110) + (SXTX #b111) +)) + +(type ExtendOp extern + (enum + (UXTB) + (UXTH) + (UXTW) + (UXTX) + (SXTB) + (SXTH) + (SXTW) + (SXTX) +)) + +;; An operation on the bits of a register. This can be paired with several instruction formats +;; below (see `Inst`) in any combination. +(type BitOp + (enum + ;; Bit reverse + (RBit) + (Clz) + (Cls) + ;; Byte reverse + (Rev16) + (Rev32) + (Rev64) +)) + +(type MemLabel extern (enum)) +(type SImm9 extern (enum)) +(type UImm12Scaled extern (enum)) + +;; An addressing mode specified for a load/store operation. +(type AMode + (enum + ;; + ;; Real ARM64 addressing modes: + ;; + ;; "post-indexed" mode as per AArch64 docs: postincrement reg after + ;; address computation. + ;; Specialized here to SP so we don't have to emit regalloc metadata. + (SPPostIndexed + (simm9 SImm9)) + + ;; "pre-indexed" mode as per AArch64 docs: preincrement reg before + ;; address computation. + ;; Specialized here to SP so we don't have to emit regalloc metadata. + (SPPreIndexed + (simm9 SImm9)) + + ;; N.B.: RegReg, RegScaled, and RegScaledExtended all correspond to + ;; what the ISA calls the "register offset" addressing mode. We split + ;; out several options here for more ergonomic codegen. + ;; + ;; Register plus register offset. + (RegReg + (rn Reg) + (rm Reg)) + + ;; Register plus register offset, scaled by type's size. + (RegScaled + (rn Reg) + (rm Reg)) + + ;; Register plus register offset, scaled by type's size, with index + ;; sign- or zero-extended first. + (RegScaledExtended + (rn Reg) + (rm Reg) + (extendop ExtendOp)) + + ;; Register plus register offset, with index sign- or zero-extended + ;; first. + (RegExtended + (rn Reg) + (rm Reg) + (extendop ExtendOp)) + + ;; Unscaled signed 9-bit immediate offset from reg. + (Unscaled + (rn Reg) + (simm9 SImm9)) + + ;; Scaled (by size of a type) unsigned 12-bit immediate offset from reg. + (UnsignedOffset + (rn Reg) + (uimm12 UImm12Scaled)) + + ;; virtual addressing modes that are lowered at emission time: + ;; + ;; Reference to a "label": e.g., a symbol. + (Label + (label MemLabel)) + + ;; Arbitrary offset from a register. Converted to generation of large + ;; offsets with multiple instructions as necessary during code emission. + (RegOffset + (rn Reg) + (off i64)) + + ;; Offset from the stack pointer. + (SPOffset + (off i64)) + + ;; Offset from the frame pointer. + (FPOffset + (off i64)) + + ;; A reference to a constant which is placed outside of the function's + ;; body, typically at the end. + (Const + (addr VCodeConstant)) + + ;; Offset from the beginning of the argument area to the argument + ;; referenced. This can only be determined when the function has been + ;; processed fully, as the size of the argument area after the prologue + ;; is only known once all return_call instructions in the function body + ;; have been processed. + (IncomingArg + (off i64)) + + ;; Offset into the slot area of the stack, which lies just above the + ;; outgoing argument area that's setup by the function prologue. + ;; At emission time, this is converted to `SPOffset` with a fixup added to + ;; the offset constant. The fixup is a running value that is tracked as + ;; emission iterates through instructions in linear order, and can be + ;; adjusted up and down with [Inst::VirtualSPOffsetAdj]. + ;; + ;; The standard ABI is in charge of handling this (by emitting the + ;; adjustment meta-instructions). See the diagram in the documentation + ;; for [crate::isa::aarch64::abi](the ABI module) for more details. + (SlotOffset + (off i64)))) + +;; A memory argument to a load/store-pair. +(type PairAMode (enum + ;; Signed, scaled 7-bit offset from a register. + (SignedOffset + (reg Reg) + (simm7 SImm7Scaled)) + + ;; Pre-increment register before address computation. + (SPPreIndexed (simm7 SImm7Scaled)) + + ;; Post-increment register after address computation. + (SPPostIndexed (simm7 SImm7Scaled)) +)) + +(type FPUOpRI extern (enum)) +(type FPUOpRIMod extern (enum)) + +(model OperandSize + (enum (Size32 32) + (Size64 64))) + +(type OperandSize extern + (enum Size32 + Size64)) + +(type TestBitAndBranchKind (enum (Z) (NZ))) + +;; Helper for calculating the `OperandSize` corresponding to a type +(spec (operand_size ty) + (provide + (= result (if (<= ty 32) 32 64))) + (require + (or (= ty 8) (= ty 16) (= ty 32) (= ty 64)))) +(instantiate operand_size + ((args Int) (ret Int) (canon (bv 8))) + ((args Int) (ret Int) (canon (bv 16))) + ((args Int) (ret Int) (canon (bv 32))) + ((args Int) (ret Int) (canon (bv 64))) +) +(decl operand_size (Type) OperandSize) +(rule operand_size_32 1 (operand_size (fits_in_32 _ty)) (OperandSize.Size32)) +(rule operand_size_64 (operand_size (fits_in_64 _ty)) (OperandSize.Size64)) + +(model ScalarSize + (enum (Size8 8) + (Size16 16) + (Size32 32) + (Size64 64) + (Size128 128))) + +;; Difference (32 - ty), useful for narrow calculations with 32-bit +;; instructions. +(decl diff_from_32 (Type) u8) +(rule (diff_from_32 $I8) 24) +(rule (diff_from_32 $I16) 16) + +(type ScalarSize extern + (enum Size8 + Size16 + Size32 + Size64 + Size128)) + +;; Helper for calculating the `ScalarSize` corresponding to a type +(decl scalar_size (Type) ScalarSize) + +(rule (scalar_size $I8) (ScalarSize.Size8)) +(rule (scalar_size $I16) (ScalarSize.Size16)) +(rule (scalar_size $I32) (ScalarSize.Size32)) +(rule (scalar_size $I64) (ScalarSize.Size64)) +(rule (scalar_size $I128) (ScalarSize.Size128)) + +(rule (scalar_size $F32) (ScalarSize.Size32)) +(rule (scalar_size $F64) (ScalarSize.Size64)) + +;; Helper for calculating the `ScalarSize` lane type from vector type +(decl lane_size (Type) ScalarSize) +(rule 1 (lane_size (multi_lane 8 _)) (ScalarSize.Size8)) +(rule 1 (lane_size (multi_lane 16 _)) (ScalarSize.Size16)) +(rule 1 (lane_size (multi_lane 32 _)) (ScalarSize.Size32)) +(rule 1 (lane_size (multi_lane 64 _)) (ScalarSize.Size64)) +(rule (lane_size (dynamic_lane 8 _)) (ScalarSize.Size8)) +(rule (lane_size (dynamic_lane 16 _)) (ScalarSize.Size16)) +(rule (lane_size (dynamic_lane 32 _)) (ScalarSize.Size32)) +(rule (lane_size (dynamic_lane 64 _)) (ScalarSize.Size64)) + +;; Helper for extracting the size of a lane from the input `VectorSize` +(decl pure vector_lane_size (VectorSize) ScalarSize) +(rule (vector_lane_size (VectorSize.Size8x16)) (ScalarSize.Size8)) +(rule (vector_lane_size (VectorSize.Size8x8)) (ScalarSize.Size8)) +(rule (vector_lane_size (VectorSize.Size16x8)) (ScalarSize.Size16)) +(rule (vector_lane_size (VectorSize.Size16x4)) (ScalarSize.Size16)) +(rule (vector_lane_size (VectorSize.Size32x4)) (ScalarSize.Size32)) +(rule (vector_lane_size (VectorSize.Size32x2)) (ScalarSize.Size32)) +(rule (vector_lane_size (VectorSize.Size64x2)) (ScalarSize.Size64)) + +(model Cond + (enum (Lo #x03) + (Hi #x08) + (Lt #x0b) + (Gt #x0c))) + +(type Cond extern + (enum + (Eq) + (Ne) + (Hs) + (Lo) + (Mi) + (Pl) + (Vs) + (Vc) + (Hi) + (Ls) + (Ge) + (Lt) + (Gt) + (Le) + (Al) + (Nv) +)) + +(model VectorSize + (enum + (Size8x8 #x00) + (Size8x16 #x01) + (Size16x4 #x02) + (Size16x8 #x03) + (Size32x2 #x04) + (Size32x4 #x05) + (Size64x2 #x06))) + +(type VectorSize extern + (enum + (Size8x8) + (Size8x16) + (Size16x4) + (Size16x8) + (Size32x2) + (Size32x4) + (Size64x2) +)) + +;; Helper for calculating the `VectorSize` corresponding to a type +(decl vector_size (Type) VectorSize) +(rule 1 (vector_size (multi_lane 8 8)) (VectorSize.Size8x8)) +(rule 1 (vector_size (multi_lane 8 16)) (VectorSize.Size8x16)) +(rule 1 (vector_size (multi_lane 16 4)) (VectorSize.Size16x4)) +(rule 1 (vector_size (multi_lane 16 8)) (VectorSize.Size16x8)) +(rule 1 (vector_size (multi_lane 32 2)) (VectorSize.Size32x2)) +(rule 1 (vector_size (multi_lane 32 4)) (VectorSize.Size32x4)) +(rule 1 (vector_size (multi_lane 64 2)) (VectorSize.Size64x2)) +(rule (vector_size (dynamic_lane 8 8)) (VectorSize.Size8x8)) +(rule (vector_size (dynamic_lane 8 16)) (VectorSize.Size8x16)) +(rule (vector_size (dynamic_lane 16 4)) (VectorSize.Size16x4)) +(rule (vector_size (dynamic_lane 16 8)) (VectorSize.Size16x8)) +(rule (vector_size (dynamic_lane 32 2)) (VectorSize.Size32x2)) +(rule (vector_size (dynamic_lane 32 4)) (VectorSize.Size32x4)) +(rule (vector_size (dynamic_lane 64 2)) (VectorSize.Size64x2)) + +;; A floating-point unit (FPU) operation with one arg. +(type FPUOp1 + (enum + (Abs) + (Neg) + (Sqrt) + (Cvt32To64) + (Cvt64To32) +)) + +;; A floating-point unit (FPU) operation with two args. +(type FPUOp2 + (enum + (Add) + (Sub) + (Mul) + (Div) + (Max) + (Min) +)) + +;; A floating-point unit (FPU) operation with three args. +(type FPUOp3 + (enum + ;; Multiply-add + (MAdd) + ;; Multiply-sub + (MSub) + ;; Negated fused Multiply-add + (NMAdd) + ;; Negated fused Multiply-sub + (NMSub) +)) + +;; A conversion from an FP to an integer value. +(type FpuToIntOp + (enum + (F32ToU32) + (F32ToI32) + (F32ToU64) + (F32ToI64) + (F64ToU32) + (F64ToI32) + (F64ToU64) + (F64ToI64) +)) + +;; A conversion from an integer to an FP value. +(type IntToFpuOp + (enum + (U32ToF32) + (I32ToF32) + (U32ToF64) + (I32ToF64) + (U64ToF32) + (I64ToF32) + (U64ToF64) + (I64ToF64) +)) + +;; Modes for FP rounding ops: round down (floor) or up (ceil), or toward zero (trunc), or to +;; nearest, and for 32- or 64-bit FP values. +(type FpuRoundMode + (enum + (Minus32) + (Minus64) + (Plus32) + (Plus64) + (Zero32) + (Zero64) + (Nearest32) + (Nearest64) +)) + +;; Type of vector element extensions. +(type VecExtendOp + (enum + ;; Signed extension + (Sxtl) + ;; Unsigned extension + (Uxtl) +)) + +;; A vector ALU operation. +(type VecALUOp + (enum + ;; Signed saturating add + (Sqadd) + ;; Unsigned saturating add + (Uqadd) + ;; Signed saturating subtract + (Sqsub) + ;; Unsigned saturating subtract + (Uqsub) + ;; Compare bitwise equal + (Cmeq) + ;; Compare signed greater than or equal + (Cmge) + ;; Compare signed greater than + (Cmgt) + ;; Compare unsigned higher + (Cmhs) + ;; Compare unsigned higher or same + (Cmhi) + ;; Floating-point compare equal + (Fcmeq) + ;; Floating-point compare greater than + (Fcmgt) + ;; Floating-point compare greater than or equal + (Fcmge) + ;; Bitwise and + (And) + ;; Bitwise bit clear + (Bic) + ;; Bitwise inclusive or + (Orr) + ;; Bitwise exclusive or + (Eor) + ;; Unsigned maximum pairwise + (Umaxp) + ;; Add + (Add) + ;; Subtract + (Sub) + ;; Multiply + (Mul) + ;; Signed shift left + (Sshl) + ;; Unsigned shift left + (Ushl) + ;; Unsigned minimum + (Umin) + ;; Signed minimum + (Smin) + ;; Unsigned maximum + (Umax) + ;; Signed maximum + (Smax) + ;; Unsigned rounding halving add + (Urhadd) + ;; Floating-point add + (Fadd) + ;; Floating-point subtract + (Fsub) + ;; Floating-point divide + (Fdiv) + ;; Floating-point maximum + (Fmax) + ;; Floating-point minimum + (Fmin) + ;; Floating-point multiply + (Fmul) + ;; Add pairwise + (Addp) + ;; Zip vectors (primary) [meaning, high halves] + (Zip1) + ;; Zip vectors (secondary) + (Zip2) + ;; Signed saturating rounding doubling multiply returning high half + (Sqrdmulh) + ;; Unzip vectors (primary) + (Uzp1) + ;; Unzip vectors (secondary) + (Uzp2) + ;; Transpose vectors (primary) + (Trn1) + ;; Transpose vectors (secondary) + (Trn2) +)) + +;; A Vector ALU operation which modifies a source register. +(type VecALUModOp + (enum + ;; Bitwise select + (Bsl) + ;; Floating-point fused multiply-add vectors + (Fmla) + ;; Floating-point fused multiply-subtract vectors + (Fmls) +)) + +;; A Vector miscellaneous operation with two registers. +(type VecMisc2 + (enum + ;; Bitwise NOT + (Not) + ;; Negate + (Neg) + ;; Absolute value + (Abs) + ;; Floating-point absolute value + (Fabs) + ;; Floating-point negate + (Fneg) + ;; Floating-point square root + (Fsqrt) + ;; Reverse elements in 16-bit lanes + (Rev16) + ;; Reverse elements in 32-bit lanes + (Rev32) + ;; Reverse elements in 64-bit doublewords + (Rev64) + ;; Floating-point convert to signed integer, rounding toward zero + (Fcvtzs) + ;; Floating-point convert to unsigned integer, rounding toward zero + (Fcvtzu) + ;; Signed integer convert to floating-point + (Scvtf) + ;; Unsigned integer convert to floating-point + (Ucvtf) + ;; Floating point round to integral, rounding towards nearest + (Frintn) + ;; Floating point round to integral, rounding towards zero + (Frintz) + ;; Floating point round to integral, rounding towards minus infinity + (Frintm) + ;; Floating point round to integral, rounding towards plus infinity + (Frintp) + ;; Population count per byte + (Cnt) + ;; Compare bitwise equal to 0 + (Cmeq0) + ;; Compare signed greater than or equal to 0 + (Cmge0) + ;; Compare signed greater than 0 + (Cmgt0) + ;; Compare signed less than or equal to 0 + (Cmle0) + ;; Compare signed less than 0 + (Cmlt0) + ;; Floating point compare equal to 0 + (Fcmeq0) + ;; Floating point compare greater than or equal to 0 + (Fcmge0) + ;; Floating point compare greater than 0 + (Fcmgt0) + ;; Floating point compare less than or equal to 0 + (Fcmle0) + ;; Floating point compare less than 0 + (Fcmlt0) +)) + +;; A vector widening operation with one argument. +(type VecRRLongOp + (enum + ;; Floating-point convert to higher precision long, 16-bit elements + (Fcvtl16) + ;; Floating-point convert to higher precision long, 32-bit elements + (Fcvtl32) + ;; Shift left long (by element size), 8-bit elements + (Shll8) + ;; Shift left long (by element size), 16-bit elements + (Shll16) + ;; Shift left long (by element size), 32-bit elements + (Shll32) +)) + +;; A vector narrowing operation with one argument. +(type VecRRNarrowOp + (enum + ;; Extract narrow. + (Xtn) + ;; Signed saturating extract narrow. + (Sqxtn) + ;; Signed saturating extract unsigned narrow. + (Sqxtun) + ;; Unsigned saturating extract narrow. + (Uqxtn) + ;; Floating-point convert to lower precision narrow. + (Fcvtn) +)) + +(type VecRRRLongOp + (enum + ;; Signed multiply long. + (Smull8) + (Smull16) + (Smull32) + ;; Unsigned multiply long. + (Umull8) + (Umull16) + (Umull32) +)) + +(type VecRRRLongModOp + (enum + ;; Unsigned multiply add long + (Umlal8) + (Umlal16) + (Umlal32) +)) + +;; A vector operation on a pair of elements with one register. +(type VecPairOp + (enum + ;; Add pair of elements + (Addp) +)) + +;; 1-operand vector instruction that extends elements of the input register +;; and operates on a pair of elements. +(type VecRRPairLongOp + (enum + ;; Sign extend and add pair of elements + (Saddlp8) + (Saddlp16) + ;; Unsigned extend and add pair of elements + (Uaddlp8) + (Uaddlp16) +)) + +;; An operation across the lanes of vectors. +(type VecLanesOp + (enum + ;; Integer addition across a vector + (Addv) + ;; Unsigned minimum across a vector + (Uminv) +)) + +;; A shift-by-immediate operation on each lane of a vector. +(type VecShiftImmOp + (enum + ;; Unsigned shift left + (Shl) + ;; Unsigned shift right + (Ushr) + ;; Signed shift right + (Sshr) +)) + +;; Destructive shift-by-immediate operation on each lane of a vector. +(type VecShiftImmModOp + (enum + ;; Shift left and insert + (Sli) +)) + +;; Atomic read-modify-write operations with acquire-release semantics +(type AtomicRMWOp + (enum + (Add) + (Clr) + (Eor) + (Set) + (Smax) + (Smin) + (Umax) + (Umin) + (Swp) +)) + +;; Atomic read-modify-write operations, with acquire-release semantics, +;; implemented with a loop. +(type AtomicRMWLoopOp + (enum + (Add) + (Sub) + (And) + (Nand) + (Eor) + (Orr) + (Smax) + (Smin) + (Umax) + (Umin) + (Xchg) +)) + +;; Keys for instruction address PACs +(type APIKey + (enum + ;; API key A with the modifier of SP + (ASP) + ;; API key B with the modifier of SP + (BSP) + ;; API key A with the modifier of zero + (AZ) + ;; API key B with the modifier of zero + (BZ) +)) + +;; Branch target types +(type BranchTargetType + (enum + (None) + (C) + (J) + (JC) +)) + +;; Extractors for target features ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(decl pure partial sign_return_address_disabled () Unit) +(extern constructor sign_return_address_disabled sign_return_address_disabled) + +(decl use_lse () Inst) +(extern extractor use_lse use_lse) + +(decl pure use_fp16 () bool) +(extern constructor use_fp16 use_fp16) + +;; Extractor helpers for various immediate constants ;;;;;;;;;;;;;;;;;;;;;;;;;; + +(decl pure partial move_wide_const_from_u64 (Type u64) MoveWideConst) +(extern constructor move_wide_const_from_u64 move_wide_const_from_u64) + +(decl pure partial move_wide_const_from_inverted_u64 (Type u64) MoveWideConst) +(extern constructor move_wide_const_from_inverted_u64 move_wide_const_from_inverted_u64) + +(decl pure partial imm_logic_from_u64 (Type u64) ImmLogic) +(extern constructor imm_logic_from_u64 imm_logic_from_u64) + +(decl pure partial imm_size_from_type (Type) u16) +(extern constructor imm_size_from_type imm_size_from_type) + +(decl pure partial imm_logic_from_imm64 (Type Imm64) ImmLogic) +(extern constructor imm_logic_from_imm64 imm_logic_from_imm64) + +(spec (imm_shift_from_imm64 ty x) + (provide (= result (extract 5 0 (bvand x (bvsub (int2bv 64 ty) #x0000000000000001))))) + (require (bvult (bvand x (bvsub (int2bv 64 ty) #x0000000000000001)) #x0000000000000040))) + +(decl pure partial imm_shift_from_imm64 (Type Imm64) ImmShift) +(extern constructor imm_shift_from_imm64 imm_shift_from_imm64) + +(decl imm_shift_from_u8 (u8) ImmShift) +(extern constructor imm_shift_from_u8 imm_shift_from_u8) + +(spec (imm12_from_u64 imm12) + (provide (= result (zero_ext 64 imm12))) + (require + ; REVIEW(mbm): correct formulation of imm12? + (or + (= imm12 (bvand imm12 #x000fff)) + (= imm12 (bvand imm12 #xfff000)) + ) + ) +) +(decl imm12_from_u64 (Imm12) u64) +(extern extractor imm12_from_u64 imm12_from_u64) + +(decl u8_into_uimm5 (u8) UImm5) +(extern constructor u8_into_uimm5 u8_into_uimm5) + +(spec (u8_into_imm12 arg) + (provide (= result (zero_ext 24 arg)))) +(decl u8_into_imm12 (u8) Imm12) +(extern constructor u8_into_imm12 u8_into_imm12) + +(spec (u64_into_imm_logic ty a) + (provide (= result a)) + (require (or (= ty 32) (= ty 64)))) +(decl u64_into_imm_logic (Type u64) ImmLogic) +(extern constructor u64_into_imm_logic u64_into_imm_logic) + +(decl branch_target (MachLabel) BranchTarget) +(extern constructor branch_target branch_target) +(convert MachLabel BranchTarget branch_target) + +(decl targets_jt_space (BoxVecMachLabel) CodeOffset) +(extern constructor targets_jt_space targets_jt_space) + +;; Calculate the minimum floating-point bound for a conversion to floating +;; point from an integer type. +;; Accepts whether the output is signed, the size of the input +;; floating point type in bits, and the size of the output integer type +;; in bits. +(decl min_fp_value (bool u8 u8) Reg) +(extern constructor min_fp_value min_fp_value) + +;; Calculate the maximum floating-point bound for a conversion to floating +;; point from an integer type. +;; Accepts whether the output is signed, the size of the input +;; floating point type in bits, and the size of the output integer type +;; in bits. +(decl max_fp_value (bool u8 u8) Reg) +(extern constructor max_fp_value max_fp_value) + +;; Constructs an FPUOpRI.Ushr* given the size in bits of the value (or lane) +;; and the amount to shift by. +(decl fpu_op_ri_ushr (u8 u8) FPUOpRI) +(extern constructor fpu_op_ri_ushr fpu_op_ri_ushr) + +;; Constructs an FPUOpRIMod.Sli* given the size in bits of the value (or lane) +;; and the amount to shift by. +(decl fpu_op_ri_sli (u8 u8) FPUOpRIMod) +(extern constructor fpu_op_ri_sli fpu_op_ri_sli) + +(decl pure partial lshr_from_u64 (Type u64) ShiftOpAndAmt) +(extern constructor lshr_from_u64 lshr_from_u64) + +(spec (lshl_from_imm64 ty a) + (provide (= result (concat #x0e (extract 7 0 a)))) + (require (= (extract 63 8 a) #b00000000000000000000000000000000000000000000000000000000))) +(decl pure partial lshl_from_imm64 (Type Imm64) ShiftOpAndAmt) +(extern constructor lshl_from_imm64 lshl_from_imm64) + +(decl pure partial lshl_from_u64 (Type u64) ShiftOpAndAmt) +(extern constructor lshl_from_u64 lshl_from_u64) + +(decl pure partial ashr_from_u64 (Type u64) ShiftOpAndAmt) +(extern constructor ashr_from_u64 ashr_from_u64) + +(decl integral_ty (Type) Type) +(extern extractor integral_ty integral_ty) + +(decl valid_atomic_transaction (Type) Type) +(extern extractor valid_atomic_transaction valid_atomic_transaction) + +(decl pure partial is_zero_simm9 (SImm9) Unit) +(extern constructor is_zero_simm9 is_zero_simm9) + +(decl pure partial is_zero_uimm12 (UImm12Scaled) Unit) +(extern constructor is_zero_uimm12 is_zero_uimm12) + +;; Helper to go directly from a `Value`, when it's an `iconst`, to an `Imm12`. +; REVIEW(mbm): is imm12_from_value spec correct? +; NOTE(mbm): compare with https://github.com/avanhatt/wasmtime/blob/94ccb9d4d55a479893cb04bc796ec620ed24cee2/cranelift/codegen/src/isa/aarch64/inst.isle#L1867-L1874 +(spec (imm12_from_value imm12) + (provide + ; REVIEW(mbm): zero_ext vs conv_to? + (= result (conv_to (widthof result) (zero_ext 64 imm12))) + (= imm12 (conv_to (widthof imm12) (zero_ext 64 result))) + ) + (require + ; REVIEW(mbm): correct formulation of imm12? + (or + (= imm12 (bvand imm12 #x000fff)) + (= imm12 (bvand imm12 #xfff000)) + ) + ) +) +(decl imm12_from_value (Imm12) Value) +(extractor + (imm12_from_value n) + (iconst (u64_from_imm64 (imm12_from_u64 n)))) +;; Conceptually the same as `imm12_from_value`, but tries negating the constant +;; value (first sign-extending to handle narrow widths). +(spec (imm12_from_negated_value arg) + (provide + (= (bvneg (sign_ext 64 arg)) (zero_ext 64 result)) + ) + (require + ; REVIEW(mbm): correct formulation of imm12? + (or + (= result (bvand result #x000fff)) + (= result (bvand result #xfff000)) + ) + ) +) + +(instantiate imm12_from_negated_value + ((args (bv 8)) (ret (bv 24)) (canon (bv 8))) + ((args (bv 16)) (ret (bv 24)) (canon (bv 16))) + ((args (bv 32)) (ret (bv 24)) (canon (bv 32))) + ((args (bv 64)) (ret (bv 24)) (canon (bv 64))) +) +(decl pure partial imm12_from_negated_value (Value) Imm12) +(rule imm12_from_negated_value + (imm12_from_negated_value (has_type ty (iconst n))) + (if-let (imm12_from_u64 imm) (i64_as_u64 (i64_neg (i64_sextend_imm64 ty n)))) + imm) + +;; Helper type to represent a value and an extend operation fused together. +(model ExtendedValue (type (bv 67))) +(type ExtendedValue extern (enum)) +;; Only including the i8 to i32 opcodes, based on the impl of extended_value_from_value +(spec (extended_value_from_value x) + (provide + (switch (extract 66 64 x) + ((ExtendOp.UXTB) (= (extract 63 0 x) (zero_ext 64 (extract 7 0 (zero_ext 64 result))))) + ((ExtendOp.UXTH) (= (extract 63 0 x) (zero_ext 64 (extract 15 0 (zero_ext 64 result))))) + ((ExtendOp.UXTW) (= (extract 63 0 x) (zero_ext 64 (extract 31 0 (zero_ext 64 result))))) + ((ExtendOp.SXTB) (= (extract 63 0 x) (sign_ext 64 (extract 7 0 (zero_ext 64 result))))) + ((ExtendOp.SXTH) (= (extract 63 0 x) (sign_ext 64 (extract 15 0 (zero_ext 64 result))))) + ((ExtendOp.SXTW) (= (extract 63 0 x) (sign_ext 64 (extract 31 0 (zero_ext 64 result))))))) + (require + (bvult (extract 66 64 x) #b110) + (not (= (extract 66 64 x) #b011)) + (= result (conv_to (widthof result) x)) + (or (= 8 (widthof result)) (= 16 (widthof result)) (= 32 (widthof result))))) +(decl extended_value_from_value (ExtendedValue) Value) +(extern extractor extended_value_from_value extended_value_from_value) + +;; Constructors used to poke at the fields of an `ExtendedValue`. +(decl put_extended_in_reg (ExtendedValue) Reg) +(extern constructor put_extended_in_reg put_extended_in_reg) +(decl get_extended_op (ExtendedValue) ExtendOp) +(extern constructor get_extended_op get_extended_op) + +(decl nzcv (bool bool bool bool) NZCV) +(extern constructor nzcv nzcv) + +(decl cond_br_zero (Reg OperandSize) CondBrKind) +(extern constructor cond_br_zero cond_br_zero) + +(decl cond_br_not_zero (Reg OperandSize) CondBrKind) +(extern constructor cond_br_not_zero cond_br_not_zero) + +(decl cond_br_cond (Cond) CondBrKind) +(extern constructor cond_br_cond cond_br_cond) + +;; Instruction creation helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Helper for creating the zero register. +(spec (zero_reg) (provide (= result #x0000000000000000))) +(decl zero_reg () Reg) +(extern constructor zero_reg zero_reg) + +(decl fp_reg () Reg) +(extern constructor fp_reg fp_reg) + +(decl stack_reg () Reg) +(extern constructor stack_reg stack_reg) + +(decl writable_link_reg () WritableReg) +(extern constructor writable_link_reg writable_link_reg) + +(decl writable_zero_reg () WritableReg) +(extern constructor writable_zero_reg writable_zero_reg) + +(decl value_regs_zero () ValueRegs) +(rule (value_regs_zero) + (value_regs + (imm $I64 (ImmExtend.Zero) 0) + (imm $I64 (ImmExtend.Zero) 0))) + + +;; Helper for emitting `MInst.Mov` instructions. +(decl mov (Reg Type) Reg) +(rule (mov src ty) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.Mov (operand_size ty) dst src)))) + dst)) + +;; Helper for emitting `MInst.MovZ` instructions. +(decl movz (MoveWideConst OperandSize) Reg) +(rule (movz imm size) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.MovWide (MoveWideOp.MovZ) dst imm size)))) + dst)) + +;; Helper for emitting `MInst.MovN` instructions. +(decl movn (MoveWideConst OperandSize) Reg) +(rule (movn imm size) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.MovWide (MoveWideOp.MovN) dst imm size)))) + dst)) + +;; Helper for emitting `MInst.AluRRImmLogic` instructions. +(decl alu_rr_imm_logic (ALUOp Type Reg ImmLogic) Reg) +(rule (alu_rr_imm_logic op ty src imm) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.AluRRImmLogic op (operand_size ty) dst src imm)))) + dst)) + +;; Helper for emitting `MInst.AluRRImmShift` instructions. +(spec (alu_rr_imm_shift op t a b) + (provide + (= result (switch op + ((ALUOp.Lsr) + (if (<= t 32) + (conv_to 64 (bvlshr (extract 31 0 a) (bvand (bvsub (int2bv 32 32) #x00000001) (extract 31 0 (zero_ext 64 b))))) + (bvlshr a (bvand (bvsub (int2bv 64 64) #x0000000000000001) (zero_ext 64 b))))) + ((ALUOp.Asr) + (if (<= t 32) + (conv_to 64 (bvashr (extract 31 0 a) (bvand (bvsub (int2bv 32 32) #x00000001) (extract 31 0 (zero_ext 64 b))))) + (bvashr a (bvand (bvsub (int2bv 64 64) #x0000000000000001) (zero_ext 64 b))))) + ((ALUOp.Lsl) + (if (<= t 32) + (conv_to 64 (bvshl (extract 31 0 a) (bvand (bvsub (int2bv 32 32) #x00000001) (extract 31 0 (zero_ext 64 b))))) + (bvshl a (bvand (bvsub (int2bv 64 64) #x0000000000000001) (zero_ext 64 b)))))))) + (require + (or (= op (ALUOp.Lsr)) (= op (ALUOp.Asr)) (= op (ALUOp.Lsl))) + (or (= t 8) (= t 16) (= t 32) (= t 64)))) +(decl alu_rr_imm_shift (ALUOp Type Reg ImmShift) Reg) +(rule (alu_rr_imm_shift op ty src imm) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.AluRRImmShift op (operand_size ty) dst src imm)))) + dst)) + +;; Helper for emitting `MInst.AluRRR` instructions. +(spec (alu_rrr op t a b) + (provide + (= result (switch op + ((ALUOp.Lsr) + (if (<= t 32) + (conv_to 64 (bvlshr (extract 31 0 a) (bvand (bvsub (int2bv 32 32) #x00000001) (extract 31 0 b)))) + (bvlshr a (bvand (bvsub (int2bv 64 64) #x0000000000000001) b)))) + ((ALUOp.Asr) + (if (<= t 32) + (conv_to 64 (bvashr (extract 31 0 a) (bvand (bvsub (int2bv 32 32) #x00000001) (extract 31 0 b)))) + (bvashr a (bvand (bvsub (int2bv 64 64) #x0000000000000001) b)))) + ((ALUOp.Lsl) + (if (<= t 32) + (conv_to 64 (bvshl (extract 31 0 a) (bvand (bvsub (int2bv 32 32) #x00000001) (extract 31 0 b)))) + (bvshl a (bvand (bvsub (int2bv 64 64) #x0000000000000001) b))))))) + (require + (or (= op (ALUOp.Lsr)) (= op (ALUOp.Asr)) (= op (ALUOp.Lsl))) + (or (= t 8) (= t 16) (= t 32) (= t 64)))) +(decl alu_rrr (ALUOp Type Reg Reg) Reg) +(rule (alu_rrr op ty src1 src2) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.AluRRR op (operand_size ty) dst src1 src2)))) + dst)) + +;; Helper for emitting `MInst.VecRRR` instructions. +(decl vec_rrr (VecALUOp Reg Reg VectorSize) Reg) +(rule (vec_rrr op src1 src2 size) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.VecRRR op dst src1 src2 size)))) + dst)) + +;; Helper for emitting `MInst.FpuRR` instructions. +(decl fpu_rr (FPUOp1 Reg ScalarSize) Reg) +(rule (fpu_rr op src size) + (let ((dst WritableReg (temp_writable_reg $F64)) + (_ Unit (emit (MInst.FpuRR op size dst src)))) + dst)) + +;; Helper for emitting `MInst.VecRRRMod` instructions which use three registers, +;; one of which is both source and output. +(decl vec_rrr_mod (VecALUModOp Reg Reg Reg VectorSize) Reg) +(rule (vec_rrr_mod op src1 src2 src3 size) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_1 Unit (emit (MInst.VecRRRMod op dst src1 src2 src3 size)))) + dst)) + +;; Helper for emitting `MInst.VecFmlaElem` instructions which use three registers, +;; one of which is both source and output. +(decl vec_fmla_elem (VecALUModOp Reg Reg Reg VectorSize u8) Reg) +(rule (vec_fmla_elem op src1 src2 src3 size idx) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_1 Unit (emit (MInst.VecFmlaElem op dst src1 src2 src3 size idx)))) + dst)) + +(decl fpu_rri (FPUOpRI Reg) Reg) +(rule (fpu_rri op src) + (let ((dst WritableReg (temp_writable_reg $F64)) + (_ Unit (emit (MInst.FpuRRI op dst src)))) + dst)) + +(decl fpu_rri_mod (FPUOpRIMod Reg Reg) Reg) +(rule (fpu_rri_mod op dst_src src) + (let ((dst WritableReg (temp_writable_reg $F64)) + (_ Unit (emit (MInst.FpuRRIMod op dst dst_src src)))) + dst)) + +;; Helper for emitting `MInst.FpuRRR` instructions. +(decl fpu_rrr (FPUOp2 Reg Reg ScalarSize) Reg) +(rule (fpu_rrr op src1 src2 size) + (let ((dst WritableReg (temp_writable_reg $F64)) + (_ Unit (emit (MInst.FpuRRR op size dst src1 src2)))) + dst)) + +;; Helper for emitting `MInst.FpuRRRR` instructions. +(decl fpu_rrrr (FPUOp3 ScalarSize Reg Reg Reg) Reg) +(rule (fpu_rrrr size op src1 src2 src3) + (let ((dst WritableReg (temp_writable_reg $F64)) + (_ Unit (emit (MInst.FpuRRRR size op dst src1 src2 src3)))) + dst)) + +;; Helper for emitting `MInst.FpuCmp` instructions. +(decl fpu_cmp (ScalarSize Reg Reg) ProducesFlags) +(rule (fpu_cmp size rn rm) + (ProducesFlags.ProducesFlagsSideEffect + (MInst.FpuCmp size rn rm))) + +;; Helper for emitting `MInst.VecLanes` instructions. +(decl vec_lanes (VecLanesOp Reg VectorSize) Reg) +(rule (vec_lanes op src size) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.VecLanes op dst src size)))) + dst)) + +;; Helper for emitting `MInst.VecShiftImm` instructions. +(decl vec_shift_imm (VecShiftImmOp u8 Reg VectorSize) Reg) +(rule (vec_shift_imm op imm src size) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.VecShiftImm op dst src size imm)))) + dst)) + +;; Helper for emitting `MInst.VecDup` instructions. +(decl vec_dup (Reg VectorSize) Reg) +(rule (vec_dup src size) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.VecDup dst src size)))) + dst)) + +;; Helper for emitting `MInst.VecDupFromFpu` instructions. +(decl vec_dup_from_fpu (Reg VectorSize u8) Reg) +(rule (vec_dup_from_fpu src size lane) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.VecDupFromFpu dst src size lane)))) + dst)) + +;; Helper for emitting `MInst.VecDupImm` instructions. +(decl vec_dup_imm (ASIMDMovModImm bool VectorSize) Reg) +(rule (vec_dup_imm imm invert size) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.VecDupImm dst imm invert size)))) + dst)) + +;; Helper for emitting `MInst.AluRRImm12` instructions. +(decl alu_rr_imm12 (ALUOp Type Reg Imm12) Reg) +(rule (alu_rr_imm12 op ty src imm) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.AluRRImm12 op (operand_size ty) dst src imm)))) + dst)) + +;; Helper for emitting `MInst.AluRRRShift` instructions. +(decl alu_rrr_shift (ALUOp Type Reg Reg ShiftOpAndAmt) Reg) +(rule (alu_rrr_shift op ty src1 src2 shift) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.AluRRRShift op (operand_size ty) dst src1 src2 shift)))) + dst)) + +;; Helper for emitting `cmp` instructions, setting flags, with a right-shifted +;; second operand register. +(decl cmp_rr_shift (OperandSize Reg Reg u64) ProducesFlags) +(rule (cmp_rr_shift size src1 src2 shift_amount) + (if-let shift (lshr_from_u64 $I64 shift_amount)) + (ProducesFlags.ProducesFlagsSideEffect + (MInst.AluRRRShift (ALUOp.SubS) size (writable_zero_reg) + src1 src2 shift))) + +;; Helper for emitting `cmp` instructions, setting flags, with an arithmetic right-shifted +;; second operand register. +(decl cmp_rr_shift_asr (OperandSize Reg Reg u64) ProducesFlags) +(rule (cmp_rr_shift_asr size src1 src2 shift_amount) + (if-let shift (ashr_from_u64 $I64 shift_amount)) + (ProducesFlags.ProducesFlagsSideEffect + (MInst.AluRRRShift (ALUOp.SubS) size (writable_zero_reg) + src1 src2 shift))) + +;; Helper for emitting `MInst.AluRRRExtend` instructions. +(decl alu_rrr_extend (ALUOp Type Reg Reg ExtendOp) Reg) +(rule (alu_rrr_extend op ty src1 src2 extend) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.AluRRRExtend op (operand_size ty) dst src1 src2 extend)))) + dst)) + +;; Same as `alu_rrr_extend`, but takes an `ExtendedValue` packed "pair" instead +;; of a `Reg` and an `ExtendOp`. +(decl alu_rr_extend_reg (ALUOp Type Reg ExtendedValue) Reg) +(rule (alu_rr_extend_reg op ty src1 extended_reg) + (let ((src2 Reg (put_extended_in_reg extended_reg)) + (extend ExtendOp (get_extended_op extended_reg))) + (alu_rrr_extend op ty src1 src2 extend))) + +;; Helper for emitting `MInst.AluRRRR` instructions. +(decl alu_rrrr (ALUOp3 Type Reg Reg Reg) Reg) +(rule (alu_rrrr op ty src1 src2 src3) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.AluRRRR op (operand_size ty) dst src1 src2 src3)))) + dst)) + +;; Helper for emitting paired `MInst.AluRRR` instructions +(decl alu_rrr_with_flags_paired (Type Reg Reg ALUOp) ProducesFlags) +(rule (alu_rrr_with_flags_paired ty src1 src2 alu_op) + (let ((dst WritableReg (temp_writable_reg $I64))) + (ProducesFlags.ProducesFlagsReturnsResultWithConsumer + (MInst.AluRRR alu_op (operand_size ty) dst src1 src2) + dst))) + +;; Should only be used for AdcS and SbcS +(decl alu_rrr_with_flags_chained (Type Reg Reg ALUOp) ConsumesAndProducesFlags) +(rule (alu_rrr_with_flags_chained ty src1 src2 alu_op) + (let ((dst WritableReg (temp_writable_reg $I64))) + (ConsumesAndProducesFlags.ReturnsReg + (MInst.AluRRR alu_op (operand_size ty) dst src1 src2) + dst))) + +;; Helper for emitting `MInst.BitRR` instructions. +(decl bit_rr (BitOp Type Reg) Reg) +(rule (bit_rr op ty src) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.BitRR op (operand_size ty) dst src)))) + dst)) + +;; Helper for emitting `adds` instructions. +(decl add_with_flags_paired (Type Reg Reg) ProducesFlags) +(rule (add_with_flags_paired ty src1 src2) + (let ((dst WritableReg (temp_writable_reg $I64))) + (ProducesFlags.ProducesFlagsReturnsResultWithConsumer + (MInst.AluRRR (ALUOp.AddS) (operand_size ty) dst src1 src2) + dst))) + +;; Helper for emitting `adc` instructions. +(decl adc_paired (Type Reg Reg) ConsumesFlags) +(rule (adc_paired ty src1 src2) + (let ((dst WritableReg (temp_writable_reg $I64))) + (ConsumesFlags.ConsumesFlagsReturnsResultWithProducer + (MInst.AluRRR (ALUOp.Adc) (operand_size ty) dst src1 src2) + dst))) + +;; Helper for emitting `subs` instructions. +(decl sub_with_flags_paired (Type Reg Reg) ProducesFlags) +(rule (sub_with_flags_paired ty src1 src2) + (let ((dst WritableReg (temp_writable_reg $I64))) + (ProducesFlags.ProducesFlagsReturnsResultWithConsumer + (MInst.AluRRR (ALUOp.SubS) (operand_size ty) dst src1 src2) + dst))) + +;; Helper for materializing a boolean value into a register from +;; flags. +(decl materialize_bool_result (Cond) ConsumesFlags) +(rule (materialize_bool_result cond) + (let ((dst WritableReg (temp_writable_reg $I64))) + (ConsumesFlags.ConsumesFlagsReturnsReg + (MInst.CSet dst cond) + dst))) + +(decl cmn_imm (OperandSize Reg Imm12) ProducesFlags) +(rule (cmn_imm size src1 src2) + (ProducesFlags.ProducesFlagsSideEffect + (MInst.AluRRImm12 (ALUOp.AddS) size (writable_zero_reg) + src1 src2))) + +(spec (cmp ty x y) + (provide (= result (subs ty x y))) + (require + (or (= ty 32) (= ty 64)))) +(decl cmp (OperandSize Reg Reg) ProducesFlags) +(rule (cmp size src1 src2) + (ProducesFlags.ProducesFlagsSideEffect + (MInst.AluRRR (ALUOp.SubS) size (writable_zero_reg) + src1 src2))) + +(spec (cmp_imm ty x y) + (provide (= result (subs ty x (zero_ext 64 y)))) + (require (or (= ty 32) (= ty 64)))) +(decl cmp_imm (OperandSize Reg Imm12) ProducesFlags) +(rule (cmp_imm size src1 src2) + (ProducesFlags.ProducesFlagsSideEffect + (MInst.AluRRImm12 (ALUOp.SubS) size (writable_zero_reg) + src1 src2))) + +(decl cmp64_imm (Reg Imm12) ProducesFlags) +(rule (cmp64_imm src1 src2) + (cmp_imm (OperandSize.Size64) src1 src2)) + +(spec (cmp_extend ty x y extend) + (provide + (= result + (subs ty x + (switch extend + ((ExtendOp.UXTB) (zero_ext 64 (extract 7 0 y))) + ((ExtendOp.UXTH) (zero_ext 64 (extract 15 0 y))) + ((ExtendOp.UXTW) (zero_ext 64 (extract 31 0 y))) + ((ExtendOp.UXTX) (zero_ext 64 (extract 63 0 y))) + ((ExtendOp.SXTB) (sign_ext 64 (extract 7 0 y))) + ((ExtendOp.SXTH) (sign_ext 64 (extract 15 0 y))) + ((ExtendOp.SXTW) (sign_ext 64 (extract 31 0 y))) + ((ExtendOp.SXTX) (sign_ext 64 (extract 63 0 y))))))) + (require (or (= ty 32) (= ty 64)))) +(decl cmp_extend (OperandSize Reg Reg ExtendOp) ProducesFlags) +(rule (cmp_extend size src1 src2 extend) + (ProducesFlags.ProducesFlagsSideEffect + (MInst.AluRRRExtend (ALUOp.SubS) size (writable_zero_reg) + src1 src2 extend))) + +;; Helper for emitting `sbc` instructions. +(decl sbc_paired (Type Reg Reg) ConsumesFlags) +(rule (sbc_paired ty src1 src2) + (let ((dst WritableReg (temp_writable_reg $I64))) + (ConsumesFlags.ConsumesFlagsReturnsResultWithProducer + (MInst.AluRRR (ALUOp.Sbc) (operand_size ty) dst src1 src2) + dst))) + +;; Helper for emitting `MInst.VecMisc` instructions. +(decl vec_misc (VecMisc2 Reg VectorSize) Reg) +(rule (vec_misc op src size) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.VecMisc op dst src size)))) + dst)) + +;; Helper for emitting `MInst.VecTbl` instructions. +(decl vec_tbl (Reg Reg) Reg) +(rule (vec_tbl rn rm) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.VecTbl dst rn rm)))) + dst)) + +(decl vec_tbl_ext (Reg Reg Reg) Reg) +(rule (vec_tbl_ext ri rn rm) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.VecTblExt dst ri rn rm)))) + dst)) + +;; Helper for emitting `MInst.VecTbl2` instructions. +(decl vec_tbl2 (Reg Reg Reg Type) Reg) +(rule (vec_tbl2 rn rn2 rm ty) + (let ( + (dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.VecTbl2 dst rn rn2 rm))) + ) + dst)) + +;; Helper for emitting `MInst.VecTbl2Ext` instructions. +(decl vec_tbl2_ext (Reg Reg Reg Reg Type) Reg) +(rule (vec_tbl2_ext ri rn rn2 rm ty) + (let ( + (dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.VecTbl2Ext dst ri rn rn2 rm))) + ) + dst)) + +;; Helper for emitting `MInst.VecRRRLong` instructions. +(decl vec_rrr_long (VecRRRLongOp Reg Reg bool) Reg) +(rule (vec_rrr_long op src1 src2 high_half) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.VecRRRLong op dst src1 src2 high_half)))) + dst)) + +;; Helper for emitting `MInst.VecRRPairLong` instructions. +(decl vec_rr_pair_long (VecRRPairLongOp Reg) Reg) +(rule (vec_rr_pair_long op src) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.VecRRPairLong op dst src)))) + dst)) + +;; Helper for emitting `MInst.VecRRRLongMod` instructions. +(decl vec_rrrr_long (VecRRRLongModOp Reg Reg Reg bool) Reg) +(rule (vec_rrrr_long op src1 src2 src3 high_half) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.VecRRRLongMod op dst src1 src2 src3 high_half)))) + dst)) + +;; Helper for emitting `MInst.VecRRNarrow` instructions. +(decl vec_rr_narrow_low (VecRRNarrowOp Reg ScalarSize) Reg) +(rule (vec_rr_narrow_low op src size) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.VecRRNarrowLow op dst src size)))) + dst)) + +;; Helper for emitting `MInst.VecRRNarrow` instructions which update the +;; high half of the destination register. +(decl vec_rr_narrow_high (VecRRNarrowOp Reg Reg ScalarSize) Reg) +(rule (vec_rr_narrow_high op mod src size) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.VecRRNarrowHigh op dst mod src size)))) + dst)) + +;; Helper for emitting `MInst.VecRRLong` instructions. +(decl vec_rr_long (VecRRLongOp Reg bool) Reg) +(rule (vec_rr_long op src high_half) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.VecRRLong op dst src high_half)))) + dst)) + +;; Helper for emitting `MInst.FpuCSel16` / `MInst.FpuCSel32` / `MInst.FpuCSel64` +;; instructions. +(decl fpu_csel (Type Cond Reg Reg) ConsumesFlags) +(rule (fpu_csel $F16 cond if_true if_false) + (fpu_csel $F32 cond if_true if_false)) + +(rule 1 (fpu_csel $F16 cond if_true if_false) + (if-let true (use_fp16)) + (let ((dst WritableReg (temp_writable_reg $F16))) + (ConsumesFlags.ConsumesFlagsReturnsReg + (MInst.FpuCSel16 dst if_true if_false cond) + dst))) + +(rule (fpu_csel $F32 cond if_true if_false) + (let ((dst WritableReg (temp_writable_reg $F32))) + (ConsumesFlags.ConsumesFlagsReturnsReg + (MInst.FpuCSel32 dst if_true if_false cond) + dst))) + +(rule (fpu_csel $F64 cond if_true if_false) + (let ((dst WritableReg (temp_writable_reg $F64))) + (ConsumesFlags.ConsumesFlagsReturnsReg + (MInst.FpuCSel64 dst if_true if_false cond) + dst))) + +;; Helper for emitting `MInst.VecCSel` instructions. +(decl vec_csel (Cond Reg Reg) ConsumesFlags) +(rule (vec_csel cond if_true if_false) + (let ((dst WritableReg (temp_writable_reg $I8X16))) + (ConsumesFlags.ConsumesFlagsReturnsReg + (MInst.VecCSel dst if_true if_false cond) + dst))) + +;; Helper for emitting `MInst.FpuRound` instructions. +(decl fpu_round (FpuRoundMode Reg) Reg) +(rule (fpu_round op rn) + (let ((dst WritableReg (temp_writable_reg $F64)) + (_ Unit (emit (MInst.FpuRound op dst rn)))) + dst)) + +;; Helper for emitting `MInst.FpuMove64` and `MInst.FpuMove128` instructions. +(decl fpu_move (Type Reg) Reg) +(rule (fpu_move _ src) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.FpuMove128 dst src)))) + dst)) +(rule 1 (fpu_move (fits_in_64 _) src) + (let ((dst WritableReg (temp_writable_reg $F64)) + (_ Unit (emit (MInst.FpuMove64 dst src)))) + dst)) + +;; Helper for emitting `MInst.MovToFpu` instructions. +(spec (mov_to_fpu x s) + (provide (= result (zero_ext 64 (conv_to s x))))) +(decl mov_to_fpu (Reg ScalarSize) Reg) +(rule (mov_to_fpu x size) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.MovToFpu dst x size)))) + dst)) +(rule 1 (mov_to_fpu x (ScalarSize.Size16)) + (if-let false (use_fp16)) + (mov_to_fpu x (ScalarSize.Size32))) + +;; Helper for emitting `MInst.FpuMoveFPImm` instructions. +(decl fpu_move_fp_imm (ASIMDFPModImm ScalarSize) Reg) +(rule (fpu_move_fp_imm imm size) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.FpuMoveFPImm dst imm size)))) + dst)) + +;; Helper for emitting `MInst.MovToVec` instructions. +(decl mov_to_vec (Reg Reg u8 VectorSize) Reg) +(rule (mov_to_vec src1 src2 lane size) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.MovToVec dst src1 src2 lane size)))) + dst)) + +;; Helper for emitting `MInst.VecMovElement` instructions. +(decl mov_vec_elem (Reg Reg u8 u8 VectorSize) Reg) +(rule (mov_vec_elem src1 src2 dst_idx src_idx size) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.VecMovElement dst src1 src2 dst_idx src_idx size)))) + dst)) + +;; Helper for emitting `MInst.MovFromVec` instructions. +(spec (mov_from_vec x i s) + (provide + (= result + (switch s + (8 + (switch i + (#x00 (zero_ext 64 (extract 7 0 x))) + (#x01 (zero_ext 64 (extract 15 8 x))) + (#x02 (zero_ext 64 (extract 23 16 x))) + (#x03 (zero_ext 64 (extract 31 24 x))) + (#x04 (zero_ext 64 (extract 39 32 x))) + (#x05 (zero_ext 64 (extract 47 40 x))) + (#x06 (zero_ext 64 (extract 55 48 x))) + (#x07 (zero_ext 64 (extract 63 56 x))))) + (16 + (switch i + (#x00 (zero_ext 64 (extract 15 0 x))) + (#x01 (zero_ext 64 (extract 31 16 x))) + (#x03 (zero_ext 64 (extract 47 32 x))) + (#x04 (zero_ext 64 (extract 63 48 x))))) + (32 + (switch i + (#x00 (zero_ext 64 (extract 31 0 x))) + (#x01 (zero_ext 64 (extract 63 32 x))))))))) +(decl mov_from_vec (Reg u8 ScalarSize) Reg) +(rule (mov_from_vec rn idx size) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.MovFromVec dst rn idx size)))) + dst)) + +;; Helper for emitting `MInst.MovFromVecSigned` instructions. +(decl mov_from_vec_signed (Reg u8 VectorSize OperandSize) Reg) +(rule (mov_from_vec_signed rn idx size scalar_size) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.MovFromVecSigned dst rn idx size scalar_size)))) + dst)) + +(decl fpu_move_from_vec (Reg u8 VectorSize) Reg) +(rule (fpu_move_from_vec rn idx size) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.FpuMoveFromVec dst rn idx size)))) + dst)) + +;; Helper for emitting `MInst.Extend` instructions. +(spec (extend a b c d) + (provide + (if b + (= result (sign_ext (bv2int d) (conv_to (bv2int c) a))) + (= result (zero_ext (bv2int d) (conv_to (bv2int c) a)))))) +(decl extend (Reg bool u8 u8) Reg) +(rule (extend rn signed from_bits to_bits) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.Extend dst rn signed from_bits to_bits)))) + dst)) + +;; Helper for emitting `MInst.FpuExtend` instructions. +(decl fpu_extend (Reg ScalarSize) Reg) +(rule (fpu_extend src size) + (let ((dst WritableReg (temp_writable_reg $F32X4)) + (_ Unit (emit (MInst.FpuExtend dst src size)))) + dst)) + +;; Helper for emitting `MInst.VecExtend` instructions. +(decl vec_extend (VecExtendOp Reg bool ScalarSize) Reg) +(rule (vec_extend op src high_half size) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.VecExtend op dst src high_half size)))) + dst)) + +;; Helper for emitting `MInst.VecExtract` instructions. +(decl vec_extract (Reg Reg u8) Reg) +(rule (vec_extract src1 src2 idx) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.VecExtract dst src1 src2 idx)))) + dst)) + +;; Helper for emitting `MInst.LoadAcquire` instructions. +(decl load_acquire (Type MemFlags Reg) Reg) +(rule (load_acquire ty flags addr) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.LoadAcquire ty dst addr flags)))) + dst)) + +;; Helper for emitting `MInst.StoreRelease` instructions. +(decl store_release (Type MemFlags Reg Reg) SideEffectNoResult) +(rule (store_release ty flags src addr) + (SideEffectNoResult.Inst (MInst.StoreRelease ty src addr flags))) + +;; Helper for generating a `tst` instruction. +;; +;; Produces a `ProducesFlags` rather than a register or emitted instruction +;; which must be paired with `with_flags*` helpers. +(decl tst_imm (Type Reg ImmLogic) ProducesFlags) +(rule (tst_imm ty reg imm) + (ProducesFlags.ProducesFlagsSideEffect + (MInst.AluRRImmLogic (ALUOp.AndS) + (operand_size ty) + (writable_zero_reg) + reg + imm))) + +;; Helper for generating a `CSel` instruction. +;; +;; Note that this doesn't actually emit anything, instead it produces a +;; `ConsumesFlags` instruction which must be consumed with `with_flags*` +;; helpers. +(decl csel (Cond Reg Reg) ConsumesFlags) +(rule (csel cond if_true if_false) + (let ((dst WritableReg (temp_writable_reg $I64))) + (ConsumesFlags.ConsumesFlagsReturnsReg + (MInst.CSel dst cond if_true if_false) + dst))) + +;; Helper for constructing `cset` instructions. +(decl cset (Cond) ConsumesFlags) +(rule (cset cond) + (let ((dst WritableReg (temp_writable_reg $I64))) + (ConsumesFlags.ConsumesFlagsReturnsReg (MInst.CSet dst cond) dst))) + +;; Helper for constructing `cset` instructions, when the flags producer will +;; also return a value. +(decl cset_paired (Cond) ConsumesFlags) +(rule (cset_paired cond) + (let ((dst WritableReg (temp_writable_reg $I64))) + (ConsumesFlags.ConsumesFlagsReturnsResultWithProducer (MInst.CSet dst cond) dst))) + +;; Helper for constructing `csetm` instructions. +(decl csetm (Cond) ConsumesFlags) +(rule (csetm cond) + (let ((dst WritableReg (temp_writable_reg $I64))) + (ConsumesFlags.ConsumesFlagsReturnsReg (MInst.CSetm dst cond) dst))) + +;; Helper for generating a `CSNeg` instruction. +;; +;; Note that this doesn't actually emit anything, instead it produces a +;; `ConsumesFlags` instruction which must be consumed with `with_flags*` +;; helpers. +(decl csneg (Cond Reg Reg) ConsumesFlags) +(rule (csneg cond if_true if_false) + (let ((dst WritableReg (temp_writable_reg $I64))) + (ConsumesFlags.ConsumesFlagsReturnsReg + (MInst.CSNeg dst cond if_true if_false) + dst))) + +;; Helper for generating `MInst.CCmp` instructions. +;; Creates a new `ProducesFlags` from the supplied `ProducesFlags` followed +;; immediately by the `MInst.CCmp` instruction. +(decl ccmp (OperandSize Reg Reg NZCV Cond ProducesFlags) ProducesFlags) +(rule (ccmp size rn rm nzcv cond inst_input) + (produces_flags_concat inst_input (ProducesFlags.ProducesFlagsSideEffect (MInst.CCmp size rn rm nzcv cond)))) + +;; Helper for generating `MInst.CCmpImm` instructions. +(decl ccmp_imm (OperandSize Reg UImm5 NZCV Cond) ConsumesFlags) +(rule 1 (ccmp_imm size rn imm nzcv cond) + (let ((dst WritableReg (temp_writable_reg $I64))) + (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs + (MInst.CCmpImm size rn imm nzcv cond) + (MInst.CSet dst cond) + (value_reg dst)))) + +;; Helpers for generating `add` instructions. +(spec (add ty a b) + (provide + (= result + (if (<= ty 32) + (conv_to 64 (bvadd (extract 31 0 a) (extract 31 0 b))) + (bvadd a b))))) +(decl add (Type Reg Reg) Reg) +(rule (add ty x y) (alu_rrr (ALUOp.Add) ty x y)) + +(spec (add_imm ty a b) + (provide + (= result + (if (<= ty 32) + (conv_to 64 (bvadd (extract 31 0 a) (zero_ext 32 b))) + (bvadd a (zero_ext 64 b))))) + (require + (or + (= b (bvand b #x000fff)) + (= b (bvand b #xfff000))))) +(decl add_imm (Type Reg Imm12) Reg) +(rule (add_imm ty x y) (alu_rr_imm12 (ALUOp.Add) ty x y)) + +(spec (add_extend ty x y) + (provide + (= result + (if (<= ty 32) + (conv_to 64 (bvadd (extract 31 0 x) + (switch (extract 66 64 y) + ((ExtendOp.UXTB) (zero_ext 32 (extract 7 0 y))) + ((ExtendOp.UXTH) (zero_ext 32 (extract 15 0 y))) + ((ExtendOp.UXTW) (zero_ext 32 (extract 31 0 y))) + ((ExtendOp.UXTX) (zero_ext 32 (extract 31 0 y))) + ((ExtendOp.SXTB) (sign_ext 32 (extract 7 0 y))) + ((ExtendOp.SXTH) (sign_ext 32 (extract 15 0 y))) + ((ExtendOp.SXTW) (sign_ext 32 (extract 31 0 y))) + ((ExtendOp.SXTX) (sign_ext 32 (extract 31 0 y)))))) + (bvadd x + (switch (extract 66 64 y) + ((ExtendOp.UXTB) (zero_ext 64 (extract 7 0 y))) + ((ExtendOp.UXTH) (zero_ext 64 (extract 15 0 y))) + ((ExtendOp.UXTW) (zero_ext 64 (extract 31 0 y))) + ((ExtendOp.UXTX) (zero_ext 64 (extract 63 0 y))) + ((ExtendOp.SXTB) (sign_ext 64 (extract 7 0 y))) + ((ExtendOp.SXTH) (sign_ext 64 (extract 15 0 y))) + ((ExtendOp.SXTW) (sign_ext 64 (extract 31 0 y))) + ((ExtendOp.SXTX) (sign_ext 64 (extract 63 0 y))))))))) +(decl add_extend (Type Reg ExtendedValue) Reg) +(rule (add_extend ty x y) (alu_rr_extend_reg (ALUOp.Add) ty x y)) + +(decl add_extend_op (Type Reg Reg ExtendOp) Reg) +(rule (add_extend_op ty x y extend) (alu_rrr_extend (ALUOp.Add) ty x y extend)) + +(spec (add_shift ty a b shift) + (provide + (= result (if (<= ty 32) + (conv_to 64 (bvadd (extract 31 0 a) + (switch (extract 15 8 shift) + ((ALUOp.Lsl) (bvshl (extract 31 0 b) (zero_ext 32 (bvand (bvsub (int2bv 8 ty) #x01) (extract 7 0 shift))))) + ((ALUOp.Lsr) (bvlshr (extract 31 0 b) (zero_ext 32 (bvand (bvsub (int2bv 8 ty) #x01) (extract 7 0 shift))))) + ((ALUOp.Asr) (bvashr (extract 31 0 b) (zero_ext 32 (bvand (bvsub (int2bv 8 ty) #x01) (extract 7 0 shift)))))))) + (bvadd a + (switch (extract 15 8 shift) + ((ALUOp.Lsl) (bvshl b (zero_ext 64 (bvand (bvsub (int2bv 8 ty) #x01) (extract 7 0 shift))))) + ((ALUOp.Lsr) (bvlshr b (zero_ext 64 (bvand (bvsub (int2bv 8 ty) #x01) (extract 7 0 shift))))) + ((ALUOp.Asr) (bvashr b (zero_ext 64 (bvand (bvsub (int2bv 8 ty) #x01) (extract 7 0 shift))))))))))) +(decl add_shift (Type Reg Reg ShiftOpAndAmt) Reg) +(rule (add_shift ty x y z) (alu_rrr_shift (ALUOp.Add) ty x y z)) + +(decl add_vec (Reg Reg VectorSize) Reg) +(rule (add_vec x y size) (vec_rrr (VecALUOp.Add) x y size)) + +;; Helpers for generating `sub` instructions. +(spec (sub ty a b) + (provide + (= result + (if (<= ty 32) + (conv_to 64 (bvsub (extract 31 0 a) (extract 31 0 b))) + (bvsub a b))))) +(decl sub (Type Reg Reg) Reg) +(rule (sub ty x y) (alu_rrr (ALUOp.Sub) ty x y)) + +(spec (sub_imm ty a b) + (provide + (= result + (if (<= ty 32) + (conv_to 64 (bvsub (extract 31 0 a) (zero_ext 32 b))) + (bvsub a (zero_ext 64 b))))) + (require + (or + (= b (bvand b #x000fff)) + (= b (bvand b #xfff000))))) +(decl sub_imm (Type Reg Imm12) Reg) +(rule (sub_imm ty x y) (alu_rr_imm12 (ALUOp.Sub) ty x y)) + +(spec (sub_extend ty x y) + (provide + (= result + (if (<= ty 32) + (conv_to 64 (bvsub (extract 31 0 x) + (switch (extract 66 64 y) + ((ExtendOp.UXTB) (zero_ext 32 (extract 7 0 y))) + ((ExtendOp.UXTH) (zero_ext 32 (extract 15 0 y))) + ((ExtendOp.UXTW) (zero_ext 32 (extract 31 0 y))) + ((ExtendOp.UXTX) (zero_ext 32 (extract 31 0 y))) + ((ExtendOp.SXTB) (sign_ext 32 (extract 7 0 y))) + ((ExtendOp.SXTH) (sign_ext 32 (extract 15 0 y))) + ((ExtendOp.SXTW) (sign_ext 32 (extract 31 0 y))) + ((ExtendOp.SXTX) (sign_ext 32 (extract 31 0 y)))))) + (bvsub x + (switch (extract 66 64 y) + ((ExtendOp.UXTB) (zero_ext 64 (extract 7 0 y))) + ((ExtendOp.UXTH) (zero_ext 64 (extract 15 0 y))) + ((ExtendOp.UXTW) (zero_ext 64 (extract 31 0 y))) + ((ExtendOp.UXTX) (zero_ext 64 (extract 63 0 y))) + ((ExtendOp.SXTB) (sign_ext 64 (extract 7 0 y))) + ((ExtendOp.SXTH) (sign_ext 64 (extract 15 0 y))) + ((ExtendOp.SXTW) (sign_ext 64 (extract 31 0 y))) + ((ExtendOp.SXTX) (sign_ext 64 (extract 63 0 y))))))))) +(decl sub_extend (Type Reg ExtendedValue) Reg) +(rule (sub_extend ty x y) (alu_rr_extend_reg (ALUOp.Sub) ty x y)) + +(spec (sub_shift ty a b shift) + (provide + (= result (if (<= ty 32) + (conv_to 64 (bvsub (extract 31 0 a) (switch (extract 15 8 shift) + ((ALUOp.Lsl) (bvshl (extract 31 0 b) (zero_ext 32 (bvand (bvsub (int2bv 8 ty) #x01) (extract 7 0 shift))))) + ((ALUOp.Lsr) (bvlshr (extract 31 0 b) (zero_ext 32 (bvand (bvsub (int2bv 8 ty) #x01) (extract 7 0 shift))))) + ((ALUOp.Asr) (bvashr (extract 31 0 b) (zero_ext 32 (bvand (bvsub (int2bv 8 ty) #x01) (extract 7 0 shift)))))))) + (bvsub a (switch (extract 15 8 shift) + ((ALUOp.Lsl) (bvshl b (zero_ext 64 (bvand (bvsub (int2bv 8 ty) #x01) (extract 7 0 shift))))) + ((ALUOp.Lsr) (bvlshr b (zero_ext 64 (bvand (bvsub (int2bv 8 ty) #x01) (extract 7 0 shift))))) + ((ALUOp.Asr) (bvashr b (zero_ext 64 (bvand (bvsub (int2bv 8 ty) #x01) (extract 7 0 shift))))))))))) +(decl sub_shift (Type Reg Reg ShiftOpAndAmt) Reg) +(rule (sub_shift ty x y z) (alu_rrr_shift (ALUOp.Sub) ty x y z)) + +(decl sub_vec (Reg Reg VectorSize) Reg) +(rule (sub_vec x y size) (vec_rrr (VecALUOp.Sub) x y size)) + +(decl sub_i128 (ValueRegs ValueRegs) ValueRegs) +(rule (sub_i128 x y) + (let + ;; Get the high/low registers for `x`. + ((x_regs ValueRegs x) + (x_lo Reg (value_regs_get x_regs 0)) + (x_hi Reg (value_regs_get x_regs 1)) + + ;; Get the high/low registers for `y`. + (y_regs ValueRegs y) + (y_lo Reg (value_regs_get y_regs 0)) + (y_hi Reg (value_regs_get y_regs 1))) + ;; the actual subtraction is `subs` followed by `sbc` which comprises + ;; the low/high bits of the result + (with_flags + (sub_with_flags_paired $I64 x_lo y_lo) + (sbc_paired $I64 x_hi y_hi)))) + +;; Helpers for generating `madd` instructions. +(spec (madd ty a b c) + (provide + (= result + (if (<= ty 32) + (conv_to 64 (bvadd (extract 31 0 c) (bvmul (extract 31 0 a) (extract 31 0 b)))) + (bvadd c (bvmul a b)))))) +(decl madd (Type Reg Reg Reg) Reg) +(rule (madd ty x y z) (alu_rrrr (ALUOp3.MAdd) ty x y z)) + +;; Helpers for generating `msub` instructions. +(spec (msub ty a b c) + (provide + (= result + (if (<= ty 32) + (conv_to 64 (bvsub (extract 31 0 c) (bvmul (extract 31 0 a) (extract 31 0 b)))) + (bvsub c (bvmul a b)))))) +(decl msub (Type Reg Reg Reg) Reg) +(rule (msub ty x y z) (alu_rrrr (ALUOp3.MSub) ty x y z)) + +;; Helpers for generating `umaddl` instructions +(decl umaddl (Reg Reg Reg) Reg) +(rule (umaddl x y z) (alu_rrrr (ALUOp3.UMAddL) $I32 x y z)) + +;; Helpers for generating `smaddl` instructions +(decl smaddl (Reg Reg Reg) Reg) +(rule (smaddl x y z) (alu_rrrr (ALUOp3.SMAddL) $I32 x y z)) + +;; Helper for generating `uqadd` instructions. +(decl uqadd (Reg Reg VectorSize) Reg) +(rule (uqadd x y size) (vec_rrr (VecALUOp.Uqadd) x y size)) + +;; Helper for generating `sqadd` instructions. +(decl sqadd (Reg Reg VectorSize) Reg) +(rule (sqadd x y size) (vec_rrr (VecALUOp.Sqadd) x y size)) + +;; Helper for generating `uqsub` instructions. +(decl uqsub (Reg Reg VectorSize) Reg) +(rule (uqsub x y size) (vec_rrr (VecALUOp.Uqsub) x y size)) + +;; Helper for generating `sqsub` instructions. +(decl sqsub (Reg Reg VectorSize) Reg) +(rule (sqsub x y size) (vec_rrr (VecALUOp.Sqsub) x y size)) + +;; Helper for generating `umulh` instructions. +(decl umulh (Type Reg Reg) Reg) +(rule (umulh ty x y) (alu_rrr (ALUOp.UMulH) ty x y)) + +;; Helper for generating `smulh` instructions. +(decl smulh (Type Reg Reg) Reg) +(rule (smulh ty x y) (alu_rrr (ALUOp.SMulH) ty x y)) + +;; Helper for generating `mul` instructions. +(decl mul (Reg Reg VectorSize) Reg) +(rule (mul x y size) (vec_rrr (VecALUOp.Mul) x y size)) + +;; Helper for generating `neg` instructions. +(decl neg (Reg VectorSize) Reg) +(rule (neg x size) (vec_misc (VecMisc2.Neg) x size)) + +;; Helper for generating `rev16` instructions. +(decl rev16 (Reg VectorSize) Reg) +(rule (rev16 x size) (vec_misc (VecMisc2.Rev16) x size)) + +;; Helper for generating `rev32` instructions. +(decl rev32 (Reg VectorSize) Reg) +(rule (rev32 x size) (vec_misc (VecMisc2.Rev32) x size)) + +;; Helper for generating `rev64` instructions. +(decl rev64 (Reg VectorSize) Reg) +(rule (rev64 x size) (vec_misc (VecMisc2.Rev64) x size)) + +;; Helper for generating `xtn` instructions. +(decl xtn (Reg ScalarSize) Reg) +(rule (xtn x size) (vec_rr_narrow_low (VecRRNarrowOp.Xtn) x size)) + +;; Helper for generating `fcvtn` instructions. +(decl fcvtn (Reg ScalarSize) Reg) +(rule (fcvtn x size) (vec_rr_narrow_low (VecRRNarrowOp.Fcvtn) x size)) + +;; Helper for generating `sqxtn` instructions. +(decl sqxtn (Reg ScalarSize) Reg) +(rule (sqxtn x size) (vec_rr_narrow_low (VecRRNarrowOp.Sqxtn) x size)) + +;; Helper for generating `sqxtn2` instructions. +(decl sqxtn2 (Reg Reg ScalarSize) Reg) +(rule (sqxtn2 x y size) (vec_rr_narrow_high (VecRRNarrowOp.Sqxtn) x y size)) + +;; Helper for generating `sqxtun` instructions. +(decl sqxtun (Reg ScalarSize) Reg) +(rule (sqxtun x size) (vec_rr_narrow_low (VecRRNarrowOp.Sqxtun) x size)) + +;; Helper for generating `sqxtun2` instructions. +(decl sqxtun2 (Reg Reg ScalarSize) Reg) +(rule (sqxtun2 x y size) (vec_rr_narrow_high (VecRRNarrowOp.Sqxtun) x y size)) + +;; Helper for generating `uqxtn` instructions. +(decl uqxtn (Reg ScalarSize) Reg) +(rule (uqxtn x size) (vec_rr_narrow_low (VecRRNarrowOp.Uqxtn) x size)) + +;; Helper for generating `uqxtn2` instructions. +(decl uqxtn2 (Reg Reg ScalarSize) Reg) +(rule (uqxtn2 x y size) (vec_rr_narrow_high (VecRRNarrowOp.Uqxtn) x y size)) + +;; Helper for generating `fence` instructions. +(decl aarch64_fence () SideEffectNoResult) +(rule (aarch64_fence) + (SideEffectNoResult.Inst (MInst.Fence))) + +;; Helper for generating `csdb` instructions. +(decl csdb () SideEffectNoResult) +(rule (csdb) + (SideEffectNoResult.Inst (MInst.Csdb))) + +;; Helper for generating `brk` instructions. +(decl brk () SideEffectNoResult) +(rule (brk) + (SideEffectNoResult.Inst (MInst.Brk))) + +;; Helper for generating `addp` instructions. +(spec (addp x y s) + (provide + (= result + (switch s + (#x00 (concat + (bvadd (extract 55 48 x) (extract 63 56 x)) + (bvadd (extract 39 32 x) (extract 47 40 x)) + (bvadd (extract 23 16 x) (extract 31 24 x)) + (bvadd (extract 7 0 x) (extract 15 8 x)) + (bvadd (extract 55 48 y) (extract 63 56 y)) + (bvadd (extract 39 32 y) (extract 47 40 y)) + (bvadd (extract 23 16 y) (extract 31 24 y)) + (bvadd (extract 7 0 y) (extract 15 8 y)))) + (#x01 (concat + (bvadd (extract 47 32 x) (extract 63 48 x)) + (bvadd (extract 15 0 x) (extract 31 16 x)) + (bvadd (extract 47 32 y) (extract 63 48 y)) + (bvadd (extract 15 0 y) (extract 31 16 y)))) + (#x02 (concat + (bvadd (extract 31 0 x) (extract 63 32 x)) + (bvadd (extract 31 0 y) (extract 63 32 y))))))) + (require (or (= s #x00) (= s #x01) (= s #x02)))) +(decl addp (Reg Reg VectorSize) Reg) +(rule (addp x y size) (vec_rrr (VecALUOp.Addp) x y size)) + +;; Helper for generating `zip1` instructions. +(decl zip1 (Reg Reg VectorSize) Reg) +(rule (zip1 x y size) (vec_rrr (VecALUOp.Zip1) x y size)) + +;; Helper for generating vector `abs` instructions. +(decl vec_abs (Reg VectorSize) Reg) +(rule (vec_abs x size) (vec_misc (VecMisc2.Abs) x size)) + +;; Helper for generating instruction sequences to calculate a scalar absolute +;; value. +(spec (abs s x) + (provide + (= result + (if (= s 32) + (conv_to 64 + (if (bvsge (extract 31 0 x) #x00000000) + (extract 31 0 x) + (bvneg (extract 31 0 x)))) + (if (bvsge x #x0000000000000000) x (bvneg x))))) + (require (or (= s 32) (= s 64)))) +(decl abs (OperandSize Reg) Reg) +(rule (abs size x) + (value_regs_get (with_flags (cmp_imm size x (u8_into_imm12 0)) + (csneg (Cond.Gt) x x)) 0)) + +;; Helper for generating `addv` instructions. +(spec (addv x s) + (provide + (= result + (switch s + (#x00 (zero_ext 64 + (bvadd (extract 7 0 x) + (bvadd (extract 15 8 x) + (bvadd (extract 23 16 x) + (bvadd (extract 31 24 x) + (bvadd (extract 39 32 x) + (bvadd (extract 47 40 x) + (bvadd (extract 55 48 x) + (extract 63 56 x)))))))))) + (#x01 (zero_ext 64 + (bvadd (extract 15 0 x) + (bvadd (extract 31 16 x) + (bvadd (extract 47 32 x) + (extract 63 48 x)))))) + (#x02 (zero_ext 64 + (bvadd (extract 31 0 x) + (extract 63 32 x))))))) + (require (or (= s #x00) (or (= s #x01) (= s #x02))))) +(decl addv (Reg VectorSize) Reg) +(rule (addv x size) (vec_lanes (VecLanesOp.Addv) x size)) + +;; Helper for generating `shll32` instructions. +(decl shll32 (Reg bool) Reg) +(rule (shll32 x high_half) (vec_rr_long (VecRRLongOp.Shll32) x high_half)) + +;; Helpers for generating `addlp` instructions. + +(decl saddlp8 (Reg) Reg) +(rule (saddlp8 x) (vec_rr_pair_long (VecRRPairLongOp.Saddlp8) x)) + +(decl saddlp16 (Reg) Reg) +(rule (saddlp16 x) (vec_rr_pair_long (VecRRPairLongOp.Saddlp16) x)) + +(decl uaddlp8 (Reg) Reg) +(rule (uaddlp8 x) (vec_rr_pair_long (VecRRPairLongOp.Uaddlp8) x)) + +(decl uaddlp16 (Reg) Reg) +(rule (uaddlp16 x) (vec_rr_pair_long (VecRRPairLongOp.Uaddlp16) x)) + +;; Helper for generating `umlal32` instructions. +(decl umlal32 (Reg Reg Reg bool) Reg) +(rule (umlal32 x y z high_half) (vec_rrrr_long (VecRRRLongModOp.Umlal32) x y z high_half)) + +;; Helper for generating `smull8` instructions. +(decl smull8 (Reg Reg bool) Reg) +(rule (smull8 x y high_half) (vec_rrr_long (VecRRRLongOp.Smull8) x y high_half)) + +;; Helper for generating `umull8` instructions. +(decl umull8 (Reg Reg bool) Reg) +(rule (umull8 x y high_half) (vec_rrr_long (VecRRRLongOp.Umull8) x y high_half)) + +;; Helper for generating `smull16` instructions. +(decl smull16 (Reg Reg bool) Reg) +(rule (smull16 x y high_half) (vec_rrr_long (VecRRRLongOp.Smull16) x y high_half)) + +;; Helper for generating `umull16` instructions. +(decl umull16 (Reg Reg bool) Reg) +(rule (umull16 x y high_half) (vec_rrr_long (VecRRRLongOp.Umull16) x y high_half)) + +;; Helper for generating `smull32` instructions. +(decl smull32 (Reg Reg bool) Reg) +(rule (smull32 x y high_half) (vec_rrr_long (VecRRRLongOp.Smull32) x y high_half)) + +;; Helper for generating `umull32` instructions. +(decl umull32 (Reg Reg bool) Reg) +(rule (umull32 x y high_half) (vec_rrr_long (VecRRRLongOp.Umull32) x y high_half)) + +;; Helper for generating `asr` instructions. +(decl asr (Type Reg Reg) Reg) +(rule (asr ty x y) (alu_rrr (ALUOp.Asr) ty x y)) + +(decl asr_imm (Type Reg ImmShift) Reg) +(rule (asr_imm ty x imm) (alu_rr_imm_shift (ALUOp.Asr) ty x imm)) + +;; Helper for generating `lsr` instructions. +(spec (lsr ty a b) + (provide + (= result + (switch ty + (32 (conv_to 64 (bvlshr (extract 31 0 a) (extract 31 0 b)))) + (64 (bvlshr a b)))))) +(decl lsr (Type Reg Reg) Reg) +(rule (lsr ty x y) (alu_rrr (ALUOp.Lsr) ty x y)) + +(spec (lsr_imm ty a b) + (provide + (= result + (switch ty + (32 (conv_to 64 (bvlshr (extract 31 0 a) (zero_ext 32 b)))) + (64 (bvlshr a (zero_ext 64 b))))))) +(decl lsr_imm (Type Reg ImmShift) Reg) +(rule (lsr_imm ty x imm) (alu_rr_imm_shift (ALUOp.Lsr) ty x imm)) + +;; Helper for generating `lsl` instructions. +(spec (lsl ty a b) + (provide + (= result + (switch ty + (32 (conv_to 64 (bvshl (extract 31 0 a) (extract 31 0 b)))) + (64 (bvshl a b)))))) +(decl lsl (Type Reg Reg) Reg) +(rule (lsl ty x y) (alu_rrr (ALUOp.Lsl) ty x y)) + +(spec (lsl_imm ty a b) + (provide + (= result + (switch ty + (32 (conv_to 64 (bvshl (extract 31 0 a) (zero_ext 32 b)))) + (64 (bvshl a (zero_ext 64 b))))))) +(decl lsl_imm (Type Reg ImmShift) Reg) +(rule (lsl_imm ty x imm) (alu_rr_imm_shift (ALUOp.Lsl) ty x imm)) + +;; Helper for generating `udiv` instructions. +(spec (a64_udiv ty a b) + (provide + (= result + (if (<= ty 32) + (conv_to 64 (bvudiv (extract 31 0 a) (extract 31 0 b))) + (bvudiv a b))))) +(decl a64_udiv (Type Reg Reg) Reg) +(rule (a64_udiv ty x y) (alu_rrr (ALUOp.UDiv) ty x y)) + +;; Helper for generating `sdiv` instructions. +(spec (a64_sdiv ty a b) + (provide + (= result + (if (<= ty 32) + (conv_to 64 (bvsdiv (extract 31 0 a) (extract 31 0 b))) + (bvsdiv a b))))) +(decl a64_sdiv (Type Reg Reg) Reg) +(rule (a64_sdiv ty x y) (alu_rrr (ALUOp.SDiv) ty x y)) + +;; Helper for generating `not` instructions. +(decl not (Reg VectorSize) Reg) +(rule (not x size) (vec_misc (VecMisc2.Not) x size)) + +;; Helpers for generating `orr_not` instructions. +(spec (orr_not ty a b) + (provide + (= result + (if (<= ty 32) + (conv_to 64 (bvor (extract 31 0 a) (bvnot (extract 31 0 b)))) + (bvor a (bvnot b)))))) +(decl orr_not (Type Reg Reg) Reg) +(rule (orr_not ty x y) (alu_rrr (ALUOp.OrrNot) ty x y)) + +(spec (orr_not_shift ty a b shift) + (provide + (= result (if (<= ty 32) + (conv_to 64 (bvor a (bvnot (bvshl b (zero_ext 64 (bvand (bvsub (int2bv 8 ty) #x01) (extract 7 0 shift))))))) + (bvor a (bvnot (bvshl b (zero_ext 64 (bvand (bvsub (int2bv 8 ty) #x01) (extract 7 0 shift)))))))))) +(decl orr_not_shift (Type Reg Reg ShiftOpAndAmt) Reg) +(rule (orr_not_shift ty x y shift) (alu_rrr_shift (ALUOp.OrrNot) ty x y shift)) + +;; Helpers for generating `orr` instructions. +(spec (orr ty a b) + (provide + (= result + (if (<= ty 32) + (conv_to 64 (bvor (extract 31 0 a) (extract 31 0 b))) + (bvor a b)))) + (require (or (= ty 8) (= ty 16) (= ty 32) (= ty 64)))) +(decl orr (Type Reg Reg) Reg) +(rule (orr ty x y) (alu_rrr (ALUOp.Orr) ty x y)) + +(spec (orr_imm ty x y) + (provide + (= result + (switch ty + (32 (conv_to 64 (bvor (extract 31 0 x) (extract 31 0 y)))) + (64 (bvor x (zero_ext 64 y)))))) + (require + (or + (= y (bvand y #x0000000000000fff)) + (= y (bvand y #x0000000000fff000))))) +(decl orr_imm (Type Reg ImmLogic) Reg) +(rule (orr_imm ty x y) (alu_rr_imm_logic (ALUOp.Orr) ty x y)) + +(decl orr_shift (Type Reg Reg ShiftOpAndAmt) Reg) +(rule (orr_shift ty x y shift) (alu_rrr_shift (ALUOp.Orr) ty x y shift)) + +(decl orr_vec (Reg Reg VectorSize) Reg) +(rule (orr_vec x y size) (vec_rrr (VecALUOp.Orr) x y size)) + +;; Helpers for generating `and` instructions. +(spec (and_reg ty a b) + (provide + (= result + (if (<= ty 32) + (conv_to 64 (bvand (extract 31 0 a) (extract 31 0 b))) + (bvand a b)))) + (require (or (= ty 8) (= ty 16) (= ty 32) (= ty 64)))) +(decl and_reg (Type Reg Reg) Reg) +(rule (and_reg ty x y) (alu_rrr (ALUOp.And) ty x y)) + +(spec (and_imm ty x y) + (provide + (= result + (switch ty + (32 (conv_to 64 (bvand (extract 31 0 x) (extract 31 0 y)))) + (64 (bvand x (zero_ext 64 y)))))) + (require + (or + (= y (bvand y #x0000000000000fff)) + (= y (bvand y #x0000000000fff000))))) +(decl and_imm (Type Reg ImmLogic) Reg) +(rule (and_imm ty x y) (alu_rr_imm_logic (ALUOp.And) ty x y)) + +(decl and_vec (Reg Reg VectorSize) Reg) +(rule (and_vec x y size) (vec_rrr (VecALUOp.And) x y size)) + +;; Helpers for generating `eor` instructions. +(decl eor (Type Reg Reg) Reg) +(rule (eor ty x y) (alu_rrr (ALUOp.Eor) ty x y)) + +(decl eor_vec (Reg Reg VectorSize) Reg) +(rule (eor_vec x y size) (vec_rrr (VecALUOp.Eor) x y size)) + +;; Helpers for generating `bic` instructions. +(spec (bic ty a b) + (provide + (= result + (if (<= ty 32) + (conv_to 64 (bvand (extract 31 0 a) (bvnot (extract 31 0 b)))) + (bvand a (bvnot b)) + ) + )) + (require (or (= ty 8) (= ty 16) (= ty 32) (= ty 64)))) +(decl bic (Type Reg Reg) Reg) +(rule (bic ty x y) (alu_rrr (ALUOp.AndNot) ty x y)) + +(decl bic_vec (Reg Reg VectorSize) Reg) +(rule (bic_vec x y size) (vec_rrr (VecALUOp.Bic) x y size)) + +;; Helpers for generating `sshl` instructions. +(decl sshl (Reg Reg VectorSize) Reg) +(rule (sshl x y size) (vec_rrr (VecALUOp.Sshl) x y size)) + +;; Helpers for generating `ushl` instructions. +(decl ushl (Reg Reg VectorSize) Reg) +(rule (ushl x y size) (vec_rrr (VecALUOp.Ushl) x y size)) + +;; Helpers for generating `ushl` instructions. +(decl ushl_vec_imm (Reg u8 VectorSize) Reg) +(rule (ushl_vec_imm x amt size) (vec_shift_imm (VecShiftImmOp.Shl) amt x size)) + +;; Helpers for generating `ushr` instructions. +(decl ushr_vec_imm (Reg u8 VectorSize) Reg) +(rule (ushr_vec_imm x amt size) (vec_shift_imm (VecShiftImmOp.Ushr) amt x size)) + +;; Helpers for generating `sshr` instructions. +(decl sshr_vec_imm (Reg u8 VectorSize) Reg) +(rule (sshr_vec_imm x amt size) (vec_shift_imm (VecShiftImmOp.Sshr) amt x size)) + +;; Helpers for generating `rotr` instructions. +;; +;; Note that the `Extr` opcode is used here as `rotr` is an alias for that +;; instruction where two operands are the same register. +(spec (a64_rotr ty x y) + (provide + (= result + (if (= ty 32) + (zero_ext 64 (rotr (extract 31 0 x) (extract 31 0 y))) + (rotr x y)))) + (require (or (= ty 32) (= ty 64)))) +(decl a64_rotr (Type Reg Reg) Reg) +(rule (a64_rotr ty x y) (alu_rrr (ALUOp.Extr) ty x y)) + +(spec (a64_rotr_imm ty x y) + (provide + (= result + (if (= ty 32) + (zero_ext 64 (rotr (extract 31 0 x) (zero_ext 32 y))) + (rotr x (zero_ext 64 y))))) + (require (or (= ty 32) (= ty 64)))) +(decl a64_rotr_imm (Type Reg ImmShift) Reg) +(rule (a64_rotr_imm ty x y) (alu_rr_imm_shift (ALUOp.Extr) ty x y)) + +;; Helpers for generating `extr` instructions +(decl a64_extr (Type Reg Reg ImmShift) Reg) +(rule (a64_extr ty x y shift) (alu_rrr_shift (ALUOp.Extr) ty x y (a64_extr_imm ty shift))) +(decl a64_extr_imm (Type ImmShift) ShiftOpAndAmt) +(extern constructor a64_extr_imm a64_extr_imm) + +;; Helpers for generating `rbit` instructions. +(spec (rbit ty a) + (provide + (= result + (if (= ty 32) + (conv_to 64 (rev (extract 31 0 a))) + (rev a)))) + (require (or (= ty 32) (= ty 64)))) +(decl rbit (Type Reg) Reg) +(rule (rbit ty x) (bit_rr (BitOp.RBit) ty x)) + +;; Helpers for generating `clz` instructions. +(spec (a64_clz ty a) + (provide + (= result + (if (= ty 32) + (conv_to 64 (clz (extract 31 0 a))) + (clz a)))) + (require (or (= ty 32) (= ty 64)))) +(decl a64_clz (Type Reg) Reg) +(rule (a64_clz ty x) (bit_rr (BitOp.Clz) ty x)) + +;; Helpers for generating `cls` instructions. +(spec (a64_cls ty a) + (provide + (= result + (if (= ty 32) + (conv_to 64 (cls (extract 31 0 a))) + (cls a)))) + (require (or (= ty 32) (= ty 64)))) +(decl a64_cls (Type Reg) Reg) +(rule (a64_cls ty x) (bit_rr (BitOp.Cls) ty x)) + +;; Helpers for generating `rev` instructions + +(decl a64_rev16 (Type Reg) Reg) +(rule (a64_rev16 ty x) (bit_rr (BitOp.Rev16) ty x)) + +(decl a64_rev32 (Type Reg) Reg) +(rule (a64_rev32 ty x) (bit_rr (BitOp.Rev32) ty x)) + +(decl a64_rev64 (Type Reg) Reg) +(rule (a64_rev64 ty x) (bit_rr (BitOp.Rev64) ty x)) + +;; Helpers for generating `eon` instructions. + +(decl eon (Type Reg Reg) Reg) +(rule (eon ty x y) (alu_rrr (ALUOp.EorNot) ty x y)) + +;; Helpers for generating `cnt` instructions. +(spec (vec_cnt x s) + (provide + (= result + (switch s + ((VectorSize.Size8x8) + (concat + (popcnt (extract 63 56 x)) + (popcnt (extract 55 48 x)) + (popcnt (extract 47 40 x)) + (popcnt (extract 39 32 x)) + (popcnt (extract 31 24 x)) + (popcnt (extract 23 16 x)) + (popcnt (extract 15 8 x)) + (popcnt (extract 7 0 x)))) + ((VectorSize.Size16x4) result) + ((VectorSize.Size32x2) result)))) + (require + (or (= s (VectorSize.Size8x8)) (= s (VectorSize.Size16x4)) (= s (VectorSize.Size32x2))))) +(decl vec_cnt (Reg VectorSize) Reg) +(rule (vec_cnt x size) (vec_misc (VecMisc2.Cnt) x size)) + +;; Helpers for generating a `bsl` instruction. + +(decl bsl (Type Reg Reg Reg) Reg) +(rule (bsl ty c x y) + (vec_rrr_mod (VecALUModOp.Bsl) c x y (vector_size ty))) + +;; Helper for generating a `udf` instruction. + +(decl udf (TrapCode) SideEffectNoResult) +(rule (udf trap_code) + (SideEffectNoResult.Inst (MInst.Udf trap_code))) + +;; Helpers for generating various load instructions, with varying +;; widths and sign/zero-extending properties. +(decl aarch64_uload8 (AMode MemFlags) Reg) +(spec (aarch64_uload8 amode flags) + (provide (= result (zero_ext 32 (load_effect flags 8 amode)))) + (require (= 32 (widthof result)))) +(rule (aarch64_uload8 amode flags) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.ULoad8 dst amode flags)))) + dst)) +(decl aarch64_sload8 (AMode MemFlags) Reg) +(rule (aarch64_sload8 amode flags) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.SLoad8 dst amode flags)))) + dst)) +(decl aarch64_uload16 (AMode MemFlags) Reg) +(spec (aarch64_uload16 amode flags) + (provide (= result (zero_ext 32 (load_effect flags 16 amode)))) + (require (= 32 (widthof result)))) +(rule (aarch64_uload16 amode flags) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.ULoad16 dst amode flags)))) + dst)) +(decl aarch64_sload16 (AMode MemFlags) Reg) +(rule (aarch64_sload16 amode flags) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.SLoad16 dst amode flags)))) + dst)) +(decl aarch64_uload32 (AMode MemFlags) Reg) +(spec (aarch64_uload32 amode flags) + (provide (= result (load_effect flags 32 amode))) + (require (= 32 (widthof result)))) +(rule (aarch64_uload32 amode flags) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.ULoad32 dst amode flags)))) + dst)) +(decl aarch64_sload32 (AMode MemFlags) Reg) +(rule (aarch64_sload32 amode flags) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.SLoad32 dst amode flags)))) + dst)) +(decl aarch64_uload64 (AMode MemFlags) Reg) +(spec (aarch64_uload64 amode flags) + (provide (= result (load_effect flags 64 amode))) + (require (= 64 (widthof result)))) +(rule (aarch64_uload64 amode flags) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.ULoad64 dst amode flags)))) + dst)) +(decl aarch64_fpuload16 (AMode MemFlags) Reg) +(rule (aarch64_fpuload16 amode flags) + (let ((dst WritableReg (temp_writable_reg $F64)) + (_ Unit (emit (MInst.FpuLoad16 dst amode flags)))) + dst)) +(decl aarch64_fpuload32 (AMode MemFlags) Reg) +(rule (aarch64_fpuload32 amode flags) + (let ((dst WritableReg (temp_writable_reg $F64)) + (_ Unit (emit (MInst.FpuLoad32 dst amode flags)))) + dst)) +(decl aarch64_fpuload64 (AMode MemFlags) Reg) +(rule (aarch64_fpuload64 amode flags) + (let ((dst WritableReg (temp_writable_reg $F64)) + (_ Unit (emit (MInst.FpuLoad64 dst amode flags)))) + dst)) +(decl aarch64_fpuload128 (AMode MemFlags) Reg) +(rule (aarch64_fpuload128 amode flags) + (let ((dst WritableReg (temp_writable_reg $F64X2)) + (_ Unit (emit (MInst.FpuLoad128 dst amode flags)))) + dst)) +(decl aarch64_loadp64 (PairAMode MemFlags) ValueRegs) +(rule (aarch64_loadp64 amode flags) + (let ((dst1 WritableReg (temp_writable_reg $I64)) + (dst2 WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.LoadP64 dst1 dst2 amode flags)))) + (value_regs dst1 dst2))) + +;; Helpers for generating various store instructions with varying +;; widths. +(decl aarch64_store8 (AMode MemFlags Reg) SideEffectNoResult) +(spec (aarch64_store8 amode flags val) + (provide (= result (store_effect flags 8 (extract 7 0 val) amode)))) +(rule (aarch64_store8 amode flags val) + (SideEffectNoResult.Inst (MInst.Store8 val amode flags))) +(decl aarch64_store16 (AMode MemFlags Reg) SideEffectNoResult) +(spec (aarch64_store16 amode flags val) + (provide (= result (store_effect flags 16 (extract 15 0 val) amode)))) +(rule (aarch64_store16 amode flags val) + (SideEffectNoResult.Inst (MInst.Store16 val amode flags))) +(decl aarch64_store32 (AMode MemFlags Reg) SideEffectNoResult) +(spec (aarch64_store32 amode flags val) + (provide (= result (store_effect flags 32 (extract 31 0 val) amode)))) +(rule (aarch64_store32 amode flags val) + (SideEffectNoResult.Inst (MInst.Store32 val amode flags))) +(decl aarch64_store64 (AMode MemFlags Reg) SideEffectNoResult) +(spec (aarch64_store64 amode flags val) + (provide (= result (store_effect flags 64 val amode)))) +(rule (aarch64_store64 amode flags val) + (SideEffectNoResult.Inst (MInst.Store64 val amode flags))) +(decl aarch64_fpustore16 (AMode MemFlags Reg) SideEffectNoResult) +(rule (aarch64_fpustore16 amode flags val) + (SideEffectNoResult.Inst (MInst.FpuStore16 val amode flags))) +(decl aarch64_fpustore32 (AMode MemFlags Reg) SideEffectNoResult) +(rule (aarch64_fpustore32 amode flags val) + (SideEffectNoResult.Inst (MInst.FpuStore32 val amode flags))) +(decl aarch64_fpustore64 (AMode MemFlags Reg) SideEffectNoResult) +(rule (aarch64_fpustore64 amode flags val) + (SideEffectNoResult.Inst (MInst.FpuStore64 val amode flags))) +(decl aarch64_fpustore128 (AMode MemFlags Reg) SideEffectNoResult) +(rule (aarch64_fpustore128 amode flags val) + (SideEffectNoResult.Inst (MInst.FpuStore128 val amode flags))) +(decl aarch64_storep64 (PairAMode MemFlags Reg Reg) SideEffectNoResult) +(rule (aarch64_storep64 amode flags val1 val2) + (SideEffectNoResult.Inst (MInst.StoreP64 val1 val2 amode flags))) + +;; Helper for generating a `trapif` instruction. + +(decl trap_if (ProducesFlags TrapCode Cond) InstOutput) +(rule (trap_if flags trap_code cond) + (side_effect + (with_flags_side_effect flags + (ConsumesFlags.ConsumesFlagsSideEffect + (MInst.TrapIf (cond_br_cond cond) trap_code))))) + +;; Helpers for lowering `trapz` and `trapnz`. +(type ZeroCond + (enum + Zero + NonZero)) + +(decl zero_cond_to_cond_br (ZeroCond Reg OperandSize) CondBrKind) +(rule (zero_cond_to_cond_br (ZeroCond.Zero) reg size) + (cond_br_zero reg size)) + +(rule (zero_cond_to_cond_br (ZeroCond.NonZero) reg size) + (cond_br_not_zero reg size)) + +(decl trap_if_val (ZeroCond Value TrapCode) InstOutput) +(rule (trap_if_val zero_cond val @ (value_type (fits_in_64 _)) trap_code) + (let ((reg Reg (put_in_reg_zext64 val))) + (side_effect + (SideEffectNoResult.Inst + (MInst.TrapIf (zero_cond_to_cond_br zero_cond reg (operand_size $I64)) trap_code))))) + +(rule -1 (trap_if_val zero_cond val @ (value_type $I128) trap_code) + (let ((c ValueRegs (put_in_regs val)) + (c_lo Reg (value_regs_get c 0)) + (c_hi Reg (value_regs_get c 1)) + (c_test Reg (orr $I64 c_lo c_hi))) + (side_effect + (SideEffectNoResult.Inst + (MInst.TrapIf (zero_cond_to_cond_br zero_cond c_test (operand_size $I64)) trap_code))))) + +;; Immediate value helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Type of extension performed by an immediate helper +(model ImmExtend + (enum + (Sign #b0) + (Zero #b1))) +(type ImmExtend + (enum + (Sign) + (Zero))) + +;; Arguments: +;; * Immediate type +;; * Way to extend the immediate value to the full width of the destination +;; register +;; * Immediate value - only the bits that fit within the type are used and +;; extended, while the rest are ignored +;; +;; Note that, unlike the convention in the AArch64 backend, this helper leaves +;; all bits in the destination register in a defined state, i.e. smaller types +;; such as `I8` are either sign- or zero-extended. +(spec (imm ty ext x) + (provide + (= result + (switch ty + (8 (if (= ext #b1) (zero_ext 64 (extract 7 0 x)) (sign_ext 64 (extract 7 0 x)))) + (16 (if (= ext #b1) (zero_ext 64 (extract 15 0 x)) (sign_ext 64 (extract 15 0 x)))) + (32 (if (= ext #b1) (zero_ext 64 (extract 32 0 x)) (sign_ext 64 (extract 32 0 x)))) + (64 x)))) + (require (or (= ty 8) (= ty 16) (= ty 32) (= ty 64)))) +(instantiate imm + ((args Int (bv 64)) (ret (bv 64)) (canon (bv 8))) + ((args Int (bv 64)) (ret (bv 64)) (canon (bv 16))) + ((args Int (bv 64)) (ret (bv 64)) (canon (bv 32))) + ((args Int (bv 64)) (ret (bv 64)) (canon (bv 64))) +) +(decl imm (Type ImmExtend u64) Reg) + +;; Move wide immediate instructions; to simplify, we only match when we +;; are zero-extending the value. +(rule 3 (imm (integral_ty ty) (ImmExtend.Zero) k) + (if-let n (move_wide_const_from_u64 ty k)) + (add_range_fact + (movz n (operand_size ty)) + 64 k k)) +(rule 2 (imm (integral_ty (ty_32_or_64 ty)) (ImmExtend.Zero) k) + (if-let n (move_wide_const_from_inverted_u64 ty k)) + (add_range_fact + (movn n (operand_size ty)) + 64 k k)) + +;; Weird logical-instruction immediate in ORI using zero register; to simplify, +;; we only match when we are zero-extending the value. +(rule 1 (imm (integral_ty ty) (ImmExtend.Zero) k) + (if-let n (imm_logic_from_u64 ty k)) + (if-let m (imm_size_from_type ty)) + (add_range_fact + (orr_imm ty (zero_reg) n) + m k k)) + +(decl load_constant_full (Type ImmExtend OperandSize u64) Reg) +(extern constructor load_constant_full load_constant_full) + +;; Fallback for integral 32-bit constants +(rule (imm (fits_in_32 (integral_ty ty)) extend n) + (load_constant_full ty extend (operand_size $I32) n)) + +;; Fallback for integral 64-bit constants +(rule -1 (imm (integral_ty $I64) extend n) + (load_constant_full $I64 extend (operand_size $I64) n)) + + +;; Sign extension helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Place a `Value` into a register, sign extending it to 32-bits +(spec (put_in_reg_sext32 arg) + (provide + (= result + (if (<= (widthof arg) 32) + (conv_to 64 (sign_ext 32 arg)) + (conv_to 64 arg))))) +(decl put_in_reg_sext32 (Value) Reg) +(rule -1 (put_in_reg_sext32 val @ (value_type (fits_in_32 ty))) + (extend val true (ty_bits ty) 32)) + +;; 32/64-bit passthrough. +(rule (put_in_reg_sext32 val @ (value_type $I32)) val) +(rule (put_in_reg_sext32 val @ (value_type $I64)) val) + +;; Place a `Value` into a register, zero extending it to 32-bits +(spec (put_in_reg_zext32 arg) + (provide + (= result + (if (<= (widthof arg) 32) + (conv_to 64 (zero_ext 32 arg)) + (conv_to 64 arg))))) +(decl put_in_reg_zext32 (Value) Reg) +(rule -1 (put_in_reg_zext32 val @ (value_type (fits_in_32 ty))) + (extend val false (ty_bits ty) 32)) + +;; 32/64-bit passthrough. +(rule (put_in_reg_zext32 val @ (value_type $I32)) val) +(rule (put_in_reg_zext32 val @ (value_type $I64)) val) + +;; Place a `Value` into a register, sign extending it to 64-bits +(spec (put_in_reg_sext64 x) + (provide (= (sign_ext 64 x) result))) +(decl put_in_reg_sext64 (Value) Reg) +(rule 1 (put_in_reg_sext64 val @ (value_type (fits_in_32 ty))) + (extend val true (ty_bits ty) 64)) + +;; 64-bit passthrough. +(rule (put_in_reg_sext64 val @ (value_type $I64)) val) + +;; Place a `Value` into a register, zero extending it to 64-bits +(spec (put_in_reg_zext64 x) + (provide (= result (zero_ext 64 x)))) +(decl put_in_reg_zext64 (Value) Reg) +(rule 1 (put_in_reg_zext64 val @ (value_type (fits_in_32 ty))) + (extend val false (ty_bits ty) 64)) + +;; 64-bit passthrough. +(rule (put_in_reg_zext64 val @ (value_type $I64)) val) + +;; Misc instruction helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(decl trap_if_zero_divisor (Reg OperandSize) Reg) +(rule (trap_if_zero_divisor reg size) + (let ((_ Unit (emit (MInst.TrapIf (cond_br_zero reg size ) (trap_code_division_by_zero))))) + reg)) + +(decl size_from_ty (Type) OperandSize) +(rule 1 (size_from_ty (fits_in_32 _ty)) (OperandSize.Size32)) +(rule (size_from_ty $I64) (OperandSize.Size64)) + +;; Check for signed overflow. The only case is min_value / -1. +;; The following checks must be done in 32-bit or 64-bit, depending +;; on the input type. For 8- and 16- bit, the check for x == min_value +;; must use a possibly-shifted value, xcheck, to overflow as expected. +(decl trap_if_div_overflow (Type Reg Reg Reg) Reg) +(rule (trap_if_div_overflow ty xcheck x y) + (let ( + ;; Check RHS is -1. + (_ Unit (emit (MInst.AluRRImm12 (ALUOp.AddS) (operand_size ty) (writable_zero_reg) y (u8_into_imm12 1)))) + + ;; Check LHS is min_value, by subtracting 1 from the possibly-shifted + ;; value and branching if there is overflow. + (_ Unit (emit (MInst.CCmpImm (size_from_ty ty) + xcheck + (u8_into_uimm5 1) + (nzcv false false false false) + (Cond.Eq)))) + (_ Unit (emit (MInst.TrapIf (cond_br_cond (Cond.Vs)) + (trap_code_integer_overflow)))) + ) + x)) + +;; In the cases narrower than a register width, subtracting 1 from the +;; min_value will not cause overflow (e.g., I8's min_value of -128 stored in +;; a 32-bit register produces -129 with no overflow). However, if we left shift +;; x by (32 - ty), we then produce the 32-bit min_value for the respective min +;; values of I8 and I16. +;; E.g., I8's 0x00000080 left-shifted by 24 is 0x80000000, which overflows. +(decl intmin_check (Type Reg) Reg) +(rule intmin_check_fits_in_16 (intmin_check (fits_in_16 ty) x) + (alu_rr_imm_shift (ALUOp.Lsl) ty x (imm_shift_from_u8 (diff_from_32 ty)))) + +;; In the I32 or I64 case, checking x itself against the min_value is fine. +(rule -1 (intmin_check ty x) x) + +;; Check for unsigned overflow. +(decl trap_if_overflow (ProducesFlags TrapCode) Reg) +(rule (trap_if_overflow producer tc) + (with_flags_reg + producer + (ConsumesFlags.ConsumesFlagsSideEffect + (MInst.TrapIf (cond_br_cond (Cond.Hs)) tc)))) + +(decl sink_atomic_load (Inst) Reg) +(rule (sink_atomic_load x @ (atomic_load _ addr)) + (let ((_ Unit (sink_inst x))) + (put_in_reg addr))) + +;; Helper for generating either an `AluRRR`, `AluRRRShift`, or `AluRRImmLogic` +;; instruction depending on the input. Note that this requires that the `ALUOp` +;; specified is commutative. +(spec (alu_rs_imm_logic_commutative op t a b) + (provide + (= result + (conv_to 64 + (switch op + ((ALUOp.Orr) (bvor a b)) + ((ALUOp.And) (bvand a b)) + ((ALUOp.Eor) (bvxor a b))))))) +(decl alu_rs_imm_logic_commutative (ALUOp Type Value Value) Reg) + +;; Base case of operating on registers. +(rule -1 (alu_rs_imm_logic_commutative op ty x y) + (alu_rrr op ty x y)) + +;; Special cases for when one operand is a constant. +(rule (alu_rs_imm_logic_commutative op ty x (iconst k)) + (if-let imm (imm_logic_from_imm64 ty k)) + (alu_rr_imm_logic op ty x imm)) +(rule 1 (alu_rs_imm_logic_commutative op ty (iconst k) x) + (if-let imm (imm_logic_from_imm64 ty k)) + (alu_rr_imm_logic op ty x imm)) + +;; Special cases for when one operand is shifted left by a constant. +(rule (alu_rs_imm_logic_commutative op ty x (ishl y (iconst k))) + (if-let amt (lshl_from_imm64 ty k)) + (alu_rrr_shift op ty x y amt)) +(rule 1 (alu_rs_imm_logic_commutative op ty (ishl x (iconst k)) y) + (if-let amt (lshl_from_imm64 ty k)) + (alu_rrr_shift op ty y x amt)) + +;; Same as `alu_rs_imm_logic_commutative` above, except that it doesn't require +;; that the operation is commutative. +(spec (alu_rs_imm_logic op t a b) + (provide + (= result + (conv_to 64 + (switch op + ((ALUOp.OrrNot) (bvor a (bvnot b))) + ((ALUOp.EorNot) (bvxor a (bvnot b))) + ((ALUOp.AndNot) (bvand a (bvnot b)))))))) +(decl alu_rs_imm_logic (ALUOp Type Value Value) Reg) +(rule -1 (alu_rs_imm_logic op ty x y) + (alu_rrr op ty x y)) +(rule (alu_rs_imm_logic op ty x (iconst k)) + (if-let imm (imm_logic_from_imm64 ty k)) + (alu_rr_imm_logic op ty x imm)) +(rule (alu_rs_imm_logic op ty x (ishl y (iconst k))) + (if-let amt (lshl_from_imm64 ty k)) + (alu_rrr_shift op ty x y amt)) + +;; Helper for generating i128 bitops which simply do the same operation to the +;; hi/lo registers. +;; +;; TODO: Support immlogic here +(decl i128_alu_bitop (ALUOp Type Value Value) ValueRegs) +(rule (i128_alu_bitop op ty x y) + (let ( + (x_regs ValueRegs (put_in_regs x)) + (x_lo Reg (value_regs_get x_regs 0)) + (x_hi Reg (value_regs_get x_regs 1)) + (y_regs ValueRegs (put_in_regs y)) + (y_lo Reg (value_regs_get y_regs 0)) + (y_hi Reg (value_regs_get y_regs 1)) + ) + (value_regs + (alu_rrr op ty x_lo y_lo) + (alu_rrr op ty x_hi y_hi)))) + +;; Helper for emitting `MInst.VecLoadReplicate` instructions. +(decl ld1r (Reg VectorSize MemFlags) Reg) +(rule (ld1r src size flags) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.VecLoadReplicate dst src size flags)))) + dst)) + +;; Helper for emitting `MInst.LoadExtName` instructions. +(decl load_ext_name (BoxExternalName i64) Reg) +(rule (load_ext_name extname offset) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.LoadExtName dst extname offset)))) + dst)) + +;; Lower the address of a load or a store. +;; +;; This will create an `AMode` representing the address of the `Value` provided +;; at runtime plus the immediate offset `i32` provided. The `Type` here is used +;; to represent the size of the value being loaded or stored for offset scaling +;; if necessary. +;; +;; Note that this is broken up into two phases. In the first phase this attempts +;; to find constants within the `val` provided and fold them in to the `offset` +;; provided. Afterwards though the `amode_no_more_iconst` helper is used at +;; which pointer constants are no longer pattern-matched and instead only +;; various modes are generated. This in theory would not be necessary with +;; mid-end optimizations that fold constants into load/store immediate offsets +;; instead, but for now each backend needs to do this. +(decl amode (Type Value i32) AMode) +(spec (amode ty val offset) + (provide (= result (bvadd val (sign_ext 64 offset)))) + (require (= 64 (widthof val)))) + +(rule 0 (amode ty val offset) + (amode_no_more_iconst ty val offset)) +(rule 1 (amode ty (iadd x (i32_from_iconst y)) offset) + (if-let new_offset (s32_add_fallible y offset)) + (amode_no_more_iconst ty x new_offset)) +(rule 2 (amode ty (iadd (i32_from_iconst x) y) offset) + (if-let new_offset (s32_add_fallible x offset)) + (amode_no_more_iconst ty y new_offset)) + +(decl amode_no_more_iconst (Type Value i32) AMode) +;; Base case: move the `offset` into a register and add it to `val` via the +;; amode +(rule 0 (amode_no_more_iconst ty val offset) + (AMode.RegReg val (imm $I64 (ImmExtend.Zero) (i64_as_u64 offset)))) + +;; Optimize cases where the `offset` provided fits into a immediates of +;; various kinds of addressing modes. +(rule 1 (amode_no_more_iconst ty val offset) + (if-let simm9 (simm9_from_i64 offset)) + (AMode.Unscaled val simm9)) +(rule 2 (amode_no_more_iconst ty val offset) + (if-let uimm12 (uimm12_scaled_from_i64 offset ty)) + (AMode.UnsignedOffset val uimm12)) + +;; Optimizations where addition can fold some operations into the `amode`. +;; +;; Note that here these take higher priority than constants because an +;; add-of-extend can be folded into an amode, representing 2 otherwise emitted +;; instructions. Constants on the other hand added to the amode represent only +;; a single instruction folded in, so fewer instructions should be generated +;; with these higher priority than the rules above. +(rule 3 (amode_no_more_iconst ty (iadd x y) offset) + (AMode.RegReg (amode_add x offset) y)) +(rule 4 (amode_no_more_iconst ty (iadd x (uextend y @ (value_type $I32))) offset) + (AMode.RegExtended (amode_add x offset) y (ExtendOp.UXTW))) +(rule 4 (amode_no_more_iconst ty (iadd x (sextend y @ (value_type $I32))) offset) + (AMode.RegExtended (amode_add x offset) y (ExtendOp.SXTW))) +(rule 5 (amode_no_more_iconst ty (iadd (uextend x @ (value_type $I32)) y) offset) + (AMode.RegExtended (amode_add y offset) x (ExtendOp.UXTW))) +(rule 5 (amode_no_more_iconst ty (iadd (sextend x @ (value_type $I32)) y) offset) + (AMode.RegExtended (amode_add y offset) x (ExtendOp.SXTW))) + +;; `RegScaled*` rules where this matches an addition of an "index register" to a +;; base register. The index register is shifted by the size of the type loaded +;; in bytes to enable this mode matching. +;; +;; Note that this can additionally bundle an extending operation but the +;; extension must happen before the shift. This will pattern-match the shift +;; first and then if that succeeds afterwards try to find an extend. +(rule 6 (amode_no_more_iconst ty (iadd x (ishl y (iconst (u64_from_imm64 n)))) offset) + (if-let true (u64_eq (ty_bytes ty) (u64_shl 1 (shift_masked_imm ty n)))) + (amode_reg_scaled (amode_add x offset) y)) +(rule 7 (amode_no_more_iconst ty (iadd (ishl y (iconst (u64_from_imm64 n))) x) offset) + (if-let true (u64_eq (ty_bytes ty) (u64_shl 1 (shift_masked_imm ty n)))) + (amode_reg_scaled (amode_add x offset) y)) + +(decl amode_reg_scaled (Reg Value) AMode) +(rule 0 (amode_reg_scaled base index) + (AMode.RegScaled base index)) +(rule 1 (amode_reg_scaled base (uextend index @ (value_type $I32))) + (AMode.RegScaledExtended base index (ExtendOp.UXTW))) +(rule 1 (amode_reg_scaled base (sextend index @ (value_type $I32))) + (AMode.RegScaledExtended base index (ExtendOp.SXTW))) + +;; Helper to add a 32-bit signed immediate to the register provided. This will +;; select an appropriate `add` instruction to use. +(decl amode_add (Reg i32) Reg) +(rule 0 (amode_add x y) + (add $I64 x (imm $I64 (ImmExtend.Zero) (i64_as_u64 y)))) +(rule 1 (amode_add x y) + (if-let (imm12_from_u64 imm12) (i64_as_u64 y)) + (add_imm $I64 x imm12)) +(rule 2 (amode_add x 0) x) + +;; Creates a `PairAMode` for the `Value` provided plus the `i32` constant +;; offset provided. +(decl pair_amode (Value i32) PairAMode) + +;; Base case where `val` and `offset` are combined with an `add` +(rule 0 (pair_amode val offset) + (if-let simm7 (simm7_scaled_from_i64 0 $I64)) + (PairAMode.SignedOffset (amode_add val offset) simm7)) + +;; Optimization when `offset` can fit into a `SImm7Scaled`. +(rule 1 (pair_amode val offset) + (if-let simm7 (simm7_scaled_from_i64 offset $I64)) + (PairAMode.SignedOffset val simm7)) + +(decl pure partial simm7_scaled_from_i64 (i64 Type) SImm7Scaled) +(extern constructor simm7_scaled_from_i64 simm7_scaled_from_i64) + +(decl pure partial uimm12_scaled_from_i64 (i64 Type) UImm12Scaled) +(extern constructor uimm12_scaled_from_i64 uimm12_scaled_from_i64) + +(decl pure partial simm9_from_i64 (i64) SImm9) +(extern constructor simm9_from_i64 simm9_from_i64) + + +(decl sink_load_into_addr (Type Inst) Reg) +(rule (sink_load_into_addr ty x @ (load _ addr (offset32 offset))) + (let ((_ Unit (sink_inst x))) + (add_imm_to_addr addr (i64_as_u64 offset)))) + +(decl add_imm_to_addr (Reg u64) Reg) +(rule 2 (add_imm_to_addr val 0) val) +(rule 1 (add_imm_to_addr val (imm12_from_u64 imm)) (add_imm $I64 val imm)) +(rule 0 (add_imm_to_addr val offset) (add $I64 val (imm $I64 (ImmExtend.Zero) offset))) + +;; Lower a constant f16. +;; +;; Note that we must make sure that all bits outside the lowest 16 are set to 0 +;; because this function is also used to load wider constants (that have zeros +;; in their most significant bits). +(decl constant_f16 (u16) Reg) +(rule 3 (constant_f16 n) + (if-let false (use_fp16)) + (constant_f32 n)) +(rule 2 (constant_f16 0) + (vec_dup_imm (asimd_mov_mod_imm_zero (ScalarSize.Size32)) + false + (VectorSize.Size32x2))) +(rule 1 (constant_f16 n) + (if-let imm (asimd_fp_mod_imm_from_u64 n (ScalarSize.Size16))) + (fpu_move_fp_imm imm (ScalarSize.Size16))) +(rule (constant_f16 n) + (mov_to_fpu (imm $I16 (ImmExtend.Zero) n) (ScalarSize.Size16))) + +;; Lower a constant f32. +;; +;; Note that we must make sure that all bits outside the lowest 32 are set to 0 +;; because this function is also used to load wider constants (that have zeros +;; in their most significant bits). +(decl constant_f32 (u32) Reg) +(rule 3 (constant_f32 0) + (vec_dup_imm (asimd_mov_mod_imm_zero (ScalarSize.Size32)) + false + (VectorSize.Size32x2))) +(rule 2 (constant_f32 n) + (if-let imm (asimd_fp_mod_imm_from_u64 n (ScalarSize.Size32))) + (fpu_move_fp_imm imm (ScalarSize.Size32))) +(rule 1 (constant_f32 (u32_as_u16 n)) + (if-let true (use_fp16)) + (constant_f16 n)) +(rule (constant_f32 n) + (mov_to_fpu (imm $I32 (ImmExtend.Zero) n) (ScalarSize.Size32))) + +;; Lower a constant f64. +;; +;; Note that we must make sure that all bits outside the lowest 64 are set to 0 +;; because this function is also used to load wider constants (that have zeros +;; in their most significant bits). +;; TODO: Treat as half of a 128 bit vector and consider replicated patterns. +;; Scalar MOVI might also be an option. +(decl constant_f64 (u64) Reg) +(rule 4 (constant_f64 0) + (vec_dup_imm (asimd_mov_mod_imm_zero (ScalarSize.Size32)) + false + (VectorSize.Size32x2))) +(rule 3 (constant_f64 n) + (if-let imm (asimd_fp_mod_imm_from_u64 n (ScalarSize.Size64))) + (fpu_move_fp_imm imm (ScalarSize.Size64))) +(rule 2 (constant_f64 (u64_as_u32 n)) + (constant_f32 n)) +(rule 1 (constant_f64 (u64_low32_bits_unset n)) + (mov_to_fpu (imm $I64 (ImmExtend.Zero) n) (ScalarSize.Size64))) +(rule (constant_f64 n) + (fpu_load64 (AMode.Const (emit_u64_le_const n)) (mem_flags_trusted))) + +;; Tests whether the low 32 bits in the input are all zero. +(decl u64_low32_bits_unset (u64) u64) +(extern extractor u64_low32_bits_unset u64_low32_bits_unset) + +;; Lower a constant f128. +(decl constant_f128 (u128) Reg) +(rule 3 (constant_f128 0) + (vec_dup_imm (asimd_mov_mod_imm_zero (ScalarSize.Size8)) + false + (VectorSize.Size8x16))) + +;; If the upper 64-bits are all zero then defer to `constant_f64`. +(rule 2 (constant_f128 (u128_as_u64 n)) (constant_f64 n)) + +;; If the low half of the u128 equals the high half then delegate to the splat +;; logic as a splat of a 64-bit value. +(rule 1 (constant_f128 (u128_replicated_u64 n)) + (splat_const n (VectorSize.Size64x2))) + +;; Base case is to load the constant from memory. +(rule (constant_f128 n) + (fpu_load128 (AMode.Const (emit_u128_le_const n)) (mem_flags_trusted))) + +;; Lower a vector splat with a constant parameter. +;; +;; The 64-bit input here only uses the low bits for the lane size in +;; `VectorSize` and all other bits are ignored. +(decl splat_const (u64 VectorSize) Reg) + +;; If the splat'd constant can itself be reduced in size then attempt to do so +;; as it will make it easier to create the immediates in the instructions below. +(rule 5 (splat_const (u64_replicated_u32 n) (VectorSize.Size64x2)) + (splat_const n (VectorSize.Size32x4))) +(rule 5 (splat_const (u32_replicated_u16 n) (VectorSize.Size32x4)) + (splat_const n (VectorSize.Size16x8))) +(rule 5 (splat_const (u32_replicated_u16 n) (VectorSize.Size32x2)) + (splat_const n (VectorSize.Size16x4))) +(rule 5 (splat_const (u16_replicated_u8 n) (VectorSize.Size16x8)) + (splat_const n (VectorSize.Size8x16))) +(rule 5 (splat_const (u16_replicated_u8 n) (VectorSize.Size16x4)) + (splat_const n (VectorSize.Size8x8))) + +;; Special cases for `vec_dup_imm` instructions where the input is either +;; negated or not. +(rule 4 (splat_const n size) + (if-let imm (asimd_mov_mod_imm_from_u64 n (vector_lane_size size))) + (vec_dup_imm imm false size)) +(rule 3 (splat_const n size) + (if-let imm (asimd_mov_mod_imm_from_u64 (u64_not n) (vector_lane_size size))) + (vec_dup_imm imm true size)) + +;; Special case a 32-bit splat where an immediate can be created by +;; concatenating the 32-bit constant into a 64-bit value +(rule 2 (splat_const n (VectorSize.Size32x4)) + (if-let imm (asimd_mov_mod_imm_from_u64 (u64_or n (u64_shl n 32)) (ScalarSize.Size64))) + (vec_dup_imm imm false (VectorSize.Size64x2))) +(rule 2 (splat_const n (VectorSize.Size32x2)) + (if-let imm (asimd_mov_mod_imm_from_u64 (u64_or n (u64_shl n 32)) (ScalarSize.Size64))) + (fpu_extend (vec_dup_imm imm false (VectorSize.Size64x2)) (ScalarSize.Size64))) + +(rule 1 (splat_const n size) + (if-let true (vec_dup_fp_imm_supports_lane_size (vector_lane_size size))) + (if-let imm (asimd_fp_mod_imm_from_u64 n (vector_lane_size size))) + (vec_dup_fp_imm imm size)) + +(decl pure vec_dup_fp_imm_supports_lane_size (ScalarSize) bool) +(rule 1 (vec_dup_fp_imm_supports_lane_size (ScalarSize.Size32)) true) +(rule 1 (vec_dup_fp_imm_supports_lane_size (ScalarSize.Size64)) true) +(rule (vec_dup_fp_imm_supports_lane_size _) false) + +;; The base case for splat is to use `vec_dup` with the immediate loaded into a +;; register. +(rule (splat_const n size) + (vec_dup (imm $I64 (ImmExtend.Zero) n) size)) + +;; Lower a FloatCC to a Cond. +(decl fp_cond_code (FloatCC) Cond) +;; TODO: Port lower_fp_condcode() to ISLE. +(extern constructor fp_cond_code fp_cond_code) + +;; Lower an integer cond code. +(spec (cond_code a) (provide (= a result))) +(decl cond_code (IntCC) Cond) +;; TODO: Port lower_condcode() to ISLE. +(extern constructor cond_code cond_code) + +;; Invert a condition code. +(decl invert_cond (Cond) Cond) +;; TODO: Port cond.invert() to ISLE. +(extern constructor invert_cond invert_cond) + +;; Generate comparison to zero operator from input condition code +(decl float_cc_cmp_zero_to_vec_misc_op (FloatCC) VecMisc2) +(extern constructor float_cc_cmp_zero_to_vec_misc_op float_cc_cmp_zero_to_vec_misc_op) + +(decl float_cc_cmp_zero_to_vec_misc_op_swap (FloatCC) VecMisc2) +(extern constructor float_cc_cmp_zero_to_vec_misc_op_swap float_cc_cmp_zero_to_vec_misc_op_swap) + +;; Match valid generic compare to zero cases +(decl fcmp_zero_cond (FloatCC) FloatCC) +(extern extractor fcmp_zero_cond fcmp_zero_cond) + +;; Match not equal compare to zero separately as it requires two output instructions +(decl fcmp_zero_cond_not_eq (FloatCC) FloatCC) +(extern extractor fcmp_zero_cond_not_eq fcmp_zero_cond_not_eq) + +;; Helper for generating float compare to zero instructions where 2nd argument is zero +(decl float_cmp_zero (FloatCC Reg VectorSize) Reg) +(rule (float_cmp_zero cond rn size) + (vec_misc (float_cc_cmp_zero_to_vec_misc_op cond) rn size)) + +;; Helper for generating float compare to zero instructions in case where 1st argument is zero +(decl float_cmp_zero_swap (FloatCC Reg VectorSize) Reg) +(rule (float_cmp_zero_swap cond rn size) + (vec_misc (float_cc_cmp_zero_to_vec_misc_op_swap cond) rn size)) + +;; Helper for generating float compare equal to zero instruction +(decl fcmeq0 (Reg VectorSize) Reg) +(rule (fcmeq0 rn size) + (vec_misc (VecMisc2.Fcmeq0) rn size)) + +;; Generate comparison to zero operator from input condition code +(decl int_cc_cmp_zero_to_vec_misc_op (IntCC) VecMisc2) +(extern constructor int_cc_cmp_zero_to_vec_misc_op int_cc_cmp_zero_to_vec_misc_op) + +(decl int_cc_cmp_zero_to_vec_misc_op_swap (IntCC) VecMisc2) +(extern constructor int_cc_cmp_zero_to_vec_misc_op_swap int_cc_cmp_zero_to_vec_misc_op_swap) + +;; Match valid generic compare to zero cases +(decl icmp_zero_cond (IntCC) IntCC) +(extern extractor icmp_zero_cond icmp_zero_cond) + +;; Match not equal compare to zero separately as it requires two output instructions +(decl icmp_zero_cond_not_eq (IntCC) IntCC) +(extern extractor icmp_zero_cond_not_eq icmp_zero_cond_not_eq) + +;; Helper for generating int compare to zero instructions where 2nd argument is zero +(decl int_cmp_zero (IntCC Reg VectorSize) Reg) +(rule (int_cmp_zero cond rn size) + (vec_misc (int_cc_cmp_zero_to_vec_misc_op cond) rn size)) + +;; Helper for generating int compare to zero instructions in case where 1st argument is zero +(decl int_cmp_zero_swap (IntCC Reg VectorSize) Reg) +(rule (int_cmp_zero_swap cond rn size) + (vec_misc (int_cc_cmp_zero_to_vec_misc_op_swap cond) rn size)) + +;; Helper for generating int compare equal to zero instruction +(decl cmeq0 (Reg VectorSize) Reg) +(rule (cmeq0 rn size) + (vec_misc (VecMisc2.Cmeq0) rn size)) + +;; Helper for emitting `MInst.AtomicRMW` instructions. +(decl lse_atomic_rmw (AtomicRMWOp Value Reg Type MemFlags) Reg) +(rule (lse_atomic_rmw op p r_arg2 ty flags) + (let ( + (r_addr Reg p) + (dst WritableReg (temp_writable_reg ty)) + (_ Unit (emit (MInst.AtomicRMW op r_arg2 dst r_addr ty flags))) + ) + dst)) + +;; Helper for emitting `MInst.AtomicCAS` instructions. +(decl lse_atomic_cas (Reg Reg Reg Type MemFlags) Reg) +(rule (lse_atomic_cas addr expect replace ty flags) + (let ( + (dst WritableReg (temp_writable_reg ty)) + (_ Unit (emit (MInst.AtomicCAS dst expect replace addr ty flags))) + ) + dst)) + +;; Helper for emitting `MInst.AtomicRMWLoop` instructions. +;; - Make sure that both args are in virtual regs, since in effect +;; we have to do a parallel copy to get them safely to the AtomicRMW input +;; regs, and that's not guaranteed safe if either is in a real reg. +;; - Move the args to the preordained AtomicRMW input regs +;; - And finally, copy the preordained AtomicRMW output reg to its destination. +(decl atomic_rmw_loop (AtomicRMWLoopOp Reg Reg Type MemFlags) Reg) +(rule (atomic_rmw_loop op addr operand ty flags) + (let ((dst WritableReg (temp_writable_reg $I64)) + (scratch1 WritableReg (temp_writable_reg $I64)) + (scratch2 WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.AtomicRMWLoop ty op flags addr operand dst scratch1 scratch2)))) + dst)) + +;; Helper for emitting `MInst.AtomicCASLoop` instructions. +;; This is very similar to, but not identical to, the AtomicRmw case. Note +;; that the AtomicCASLoop sequence does its own masking, so we don't need to worry +;; about zero-extending narrow (I8/I16/I32) values here. +;; Make sure that all three args are in virtual regs. See corresponding comment +;; for `atomic_rmw_loop` above. +(decl atomic_cas_loop (Reg Reg Reg Type MemFlags) Reg) +(rule (atomic_cas_loop addr expect replace ty flags) + (let ((dst WritableReg (temp_writable_reg $I64)) + (scratch WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.AtomicCASLoop ty flags addr expect replace dst scratch)))) + dst)) + +;; Helper for emitting `MInst.MovPReg` instructions. +(decl mov_from_preg (PReg) Reg) +(rule (mov_from_preg src) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.MovFromPReg dst src)))) + dst)) + +(decl mov_to_preg (PReg Reg) SideEffectNoResult) +(rule (mov_to_preg dst src) + (SideEffectNoResult.Inst (MInst.MovToPReg dst src))) + +(decl preg_sp () PReg) +(extern constructor preg_sp preg_sp) + +(decl preg_fp () PReg) +(extern constructor preg_fp preg_fp) + +(decl preg_link () PReg) +(extern constructor preg_link preg_link) + +(decl preg_pinned () PReg) +(extern constructor preg_pinned preg_pinned) + +(decl aarch64_sp () Reg) +(rule (aarch64_sp) + (mov_from_preg (preg_sp))) + +(decl aarch64_fp () Reg) +(rule (aarch64_fp) + (mov_from_preg (preg_fp))) + +(decl aarch64_link () Reg) +(rule 1 (aarch64_link) + (if (preserve_frame_pointers)) + (if (sign_return_address_disabled)) + (let ((dst WritableReg (temp_writable_reg $I64)) + ;; Even though LR is not an allocatable register, whether it + ;; contains the return address for the current function is + ;; unknown at this point. For example, this operation may come + ;; immediately after a call, in which case LR would not have a + ;; valid value. That's why we must obtain the return address from + ;; the frame record that corresponds to the current subroutine on + ;; the stack; the presence of the record is guaranteed by the + ;; `preserve_frame_pointers` setting. + (addr AMode (AMode.FPOffset 8)) + (_ Unit (emit (MInst.ULoad64 dst addr (mem_flags_trusted))))) + dst)) + +(rule (aarch64_link) + (if (preserve_frame_pointers)) + ;; Similarly to the rule above, we must load the return address from the + ;; the frame record. Furthermore, we can use LR as a scratch register + ;; because the function will set it to the return address immediately + ;; before returning. + (let ((addr AMode (AMode.FPOffset 8)) + (lr WritableReg (writable_link_reg)) + (_ Unit (emit (MInst.ULoad64 lr addr (mem_flags_trusted)))) + (_ Unit (emit (MInst.Xpaclri)))) + (mov_from_preg (preg_link)))) + +;; Helper for getting the maximum shift amount for a type. + +(decl max_shift (Type) u8) +(rule (max_shift $F64) 63) +(rule (max_shift $F32) 31) + +;; Helper for generating `fcopysign` instruction sequences. + +(decl fcopy_sign (Reg Reg Type) Reg) +(rule 1 (fcopy_sign x y (ty_scalar_float ty)) + (let ((dst WritableReg (temp_writable_reg $F64)) + (tmp Reg (fpu_rri (fpu_op_ri_ushr (ty_bits ty) (max_shift ty)) y)) + (_ Unit (emit (MInst.FpuRRIMod (fpu_op_ri_sli (ty_bits ty) (max_shift ty)) dst x tmp)))) + dst)) +(rule (fcopy_sign x y ty @ (multi_lane _ _)) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (tmp Reg (ushr_vec_imm y (max_shift (lane_type ty)) (vector_size ty))) + (_ Unit (emit (MInst.VecShiftImmMod (VecShiftImmModOp.Sli) dst x tmp (vector_size ty) (max_shift (lane_type ty)))))) + dst)) + +;; Helpers for generating `MInst.FpuToInt` instructions. + +(decl fpu_to_int_nan_check (ScalarSize Reg) Reg) +(rule (fpu_to_int_nan_check size src) + (let ((r ValueRegs + (with_flags (fpu_cmp size src src) + (ConsumesFlags.ConsumesFlagsReturnsReg + (MInst.TrapIf (cond_br_cond (Cond.Vs)) + (trap_code_bad_conversion_to_integer)) + src)))) + (value_regs_get r 0))) + +;; Checks that the value is not less than the minimum bound, +;; accepting a boolean (whether the type is signed), input type, +;; output type, and registers containing the source and minimum bound. +(decl fpu_to_int_underflow_check (bool Type Type Reg Reg) Reg) +(rule (fpu_to_int_underflow_check true $F32 (fits_in_16 out_ty) src min) + (let ((r ValueRegs + (with_flags (fpu_cmp (ScalarSize.Size32) src min) + (ConsumesFlags.ConsumesFlagsReturnsReg + (MInst.TrapIf (cond_br_cond (Cond.Le)) + (trap_code_integer_overflow)) + src)))) + (value_regs_get r 0))) +(rule (fpu_to_int_underflow_check true $F64 (fits_in_32 out_ty) src min) + (let ((r ValueRegs + (with_flags (fpu_cmp (ScalarSize.Size64) src min) + (ConsumesFlags.ConsumesFlagsReturnsReg + (MInst.TrapIf (cond_br_cond (Cond.Le)) + (trap_code_integer_overflow)) + src)))) + (value_regs_get r 0))) +(rule -1 (fpu_to_int_underflow_check true in_ty _out_ty src min) + (let ((r ValueRegs + (with_flags (fpu_cmp (scalar_size in_ty) src min) + (ConsumesFlags.ConsumesFlagsReturnsReg + (MInst.TrapIf (cond_br_cond (Cond.Lt)) + (trap_code_integer_overflow)) + src)))) + (value_regs_get r 0))) +(rule (fpu_to_int_underflow_check false in_ty _out_ty src min) + (let ((r ValueRegs + (with_flags (fpu_cmp (scalar_size in_ty) src min) + (ConsumesFlags.ConsumesFlagsReturnsReg + (MInst.TrapIf (cond_br_cond (Cond.Le)) + (trap_code_integer_overflow)) + src)))) + (value_regs_get r 0))) + +(decl fpu_to_int_overflow_check (ScalarSize Reg Reg) Reg) +(rule (fpu_to_int_overflow_check size src max) + (let ((r ValueRegs + (with_flags (fpu_cmp size src max) + (ConsumesFlags.ConsumesFlagsReturnsReg + (MInst.TrapIf (cond_br_cond (Cond.Ge)) + (trap_code_integer_overflow)) + src)))) + (value_regs_get r 0))) + +;; Emits the appropriate instruction sequence to convert a +;; floating-point value to an integer, trapping if the value +;; is a NaN or does not fit in the target type. +;; Accepts the specific conversion op, the source register, +;; whether the input is signed, and finally the input and output +;; types. +(decl fpu_to_int_cvt (FpuToIntOp Reg bool Type Type) Reg) +(rule (fpu_to_int_cvt op src signed in_ty out_ty) + (let ((size ScalarSize (scalar_size in_ty)) + (in_bits u8 (ty_bits in_ty)) + (out_bits u8 (ty_bits out_ty)) + (src Reg (fpu_to_int_nan_check size src)) + (min Reg (min_fp_value signed in_bits out_bits)) + (src Reg (fpu_to_int_underflow_check signed in_ty out_ty src min)) + (max Reg (max_fp_value signed in_bits out_bits)) + (src Reg (fpu_to_int_overflow_check size src max))) + (fpu_to_int op src))) + +;; Emits the appropriate instruction sequence to convert a +;; floating-point value to an integer, saturating if the value +;; does not fit in the target type. +;; Accepts the specific conversion op, the source register, +;; whether the input is signed, and finally the output type. +(decl fpu_to_int_cvt_sat (FpuToIntOp Reg bool Type) Reg) +(rule 1 (fpu_to_int_cvt_sat op src _ $I64) + (fpu_to_int op src)) +(rule 1 (fpu_to_int_cvt_sat op src _ $I32) + (fpu_to_int op src)) +(rule (fpu_to_int_cvt_sat op src false (fits_in_16 out_ty)) + (let ((result Reg (fpu_to_int op src)) + (max Reg (imm out_ty (ImmExtend.Zero) (ty_mask out_ty)))) + (with_flags_reg + (cmp (OperandSize.Size32) result max) + (csel (Cond.Hi) max result)))) +(rule (fpu_to_int_cvt_sat op src true (fits_in_16 out_ty)) + (let ((result Reg (fpu_to_int op src)) + (max Reg (signed_max out_ty)) + (min Reg (signed_min out_ty)) + (result Reg (with_flags_reg + (cmp (operand_size out_ty) result max) + (csel (Cond.Gt) max result))) + (result Reg (with_flags_reg + (cmp (operand_size out_ty) result min) + (csel (Cond.Lt) min result)))) + result)) + +(decl signed_min (Type) Reg) +(rule (signed_min $I8) (imm $I8 (ImmExtend.Sign) 0x80)) +(rule (signed_min $I16) (imm $I16 (ImmExtend.Sign) 0x8000)) + +(decl signed_max (Type) Reg) +(rule (signed_max $I8) (imm $I8 (ImmExtend.Sign) 0x7F)) +(rule (signed_max $I16) (imm $I16 (ImmExtend.Sign) 0x7FFF)) + +(decl fpu_to_int (FpuToIntOp Reg) Reg) +(rule (fpu_to_int op src) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.FpuToInt op dst src)))) + dst)) + +;; Helper for generating `MInst.IntToFpu` instructions. + +(decl int_to_fpu (IntToFpuOp Reg) Reg) +(rule (int_to_fpu op src) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.IntToFpu op dst src)))) + dst)) + +;;;; Helpers for Emitting Calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(decl gen_call (SigRef ExternalName RelocDistance ValueSlice) InstOutput) +(extern constructor gen_call gen_call) + +(decl gen_call_indirect (SigRef Value ValueSlice) InstOutput) +(extern constructor gen_call_indirect gen_call_indirect) + +;; Helpers for pinned register manipulation. + +(decl write_pinned_reg (Reg) SideEffectNoResult) +(rule (write_pinned_reg val) + (mov_to_preg (preg_pinned) val)) + +;; Helpers for stackslot effective address generation. + +(decl compute_stack_addr (StackSlot Offset32) Reg) +(rule (compute_stack_addr stack_slot offset) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (abi_stackslot_addr dst stack_slot offset)))) + dst)) + +;; Helper for emitting instruction sequences to perform a vector comparison. + +(decl vec_cmp_vc (Reg Reg VectorSize) Reg) +(rule (vec_cmp_vc rn rm size) + (let ((dst Reg (vec_rrr (VecALUOp.Fcmeq) rn rn size)) + (tmp Reg (vec_rrr (VecALUOp.Fcmeq) rm rm size)) + (dst Reg (vec_rrr (VecALUOp.And) dst tmp size))) + dst)) + +(decl vec_cmp (Reg Reg Type Cond) Reg) + +;; Floating point Vs / Vc +(rule (vec_cmp rn rm ty (Cond.Vc)) + (if (ty_vector_float ty)) + (vec_cmp_vc rn rm (vector_size ty))) +(rule (vec_cmp rn rm ty (Cond.Vs)) + (if (ty_vector_float ty)) + (let ((tmp Reg (vec_cmp_vc rn rm (vector_size ty)))) + (vec_misc (VecMisc2.Not) tmp (vector_size ty)))) + +;; 'Less than' operations are implemented by swapping the order of +;; operands and using the 'greater than' instructions. +;; 'Not equal' is implemented with 'equal' and inverting the result. + +;; Floating-point +(rule (vec_cmp rn rm ty (Cond.Eq)) + (if (ty_vector_float ty)) + (vec_rrr (VecALUOp.Fcmeq) rn rm (vector_size ty))) +(rule (vec_cmp rn rm ty (Cond.Ne)) + (if (ty_vector_float ty)) + (let ((tmp Reg (vec_rrr (VecALUOp.Fcmeq) rn rm (vector_size ty)))) + (vec_misc (VecMisc2.Not) tmp (vector_size ty)))) +(rule (vec_cmp rn rm ty (Cond.Ge)) + (if (ty_vector_float ty)) + (vec_rrr (VecALUOp.Fcmge) rn rm (vector_size ty))) +(rule (vec_cmp rn rm ty (Cond.Gt)) + (if (ty_vector_float ty)) + (vec_rrr (VecALUOp.Fcmgt) rn rm (vector_size ty))) +;; Floating-point swapped-operands +(rule (vec_cmp rn rm ty (Cond.Mi)) + (if (ty_vector_float ty)) + (vec_rrr (VecALUOp.Fcmgt) rm rn (vector_size ty))) +(rule (vec_cmp rn rm ty (Cond.Ls)) + (if (ty_vector_float ty)) + (vec_rrr (VecALUOp.Fcmge) rm rn (vector_size ty))) + +;; Integer +(rule 1 (vec_cmp rn rm ty (Cond.Eq)) + (if (ty_vector_not_float ty)) + (vec_rrr (VecALUOp.Cmeq) rn rm (vector_size ty))) +(rule 1 (vec_cmp rn rm ty (Cond.Ne)) + (if (ty_vector_not_float ty)) + (let ((tmp Reg (vec_rrr (VecALUOp.Cmeq) rn rm (vector_size ty)))) + (vec_misc (VecMisc2.Not) tmp (vector_size ty)))) +(rule 1 (vec_cmp rn rm ty (Cond.Ge)) + (if (ty_vector_not_float ty)) + (vec_rrr (VecALUOp.Cmge) rn rm (vector_size ty))) +(rule 1 (vec_cmp rn rm ty (Cond.Gt)) + (if (ty_vector_not_float ty)) + (vec_rrr (VecALUOp.Cmgt) rn rm (vector_size ty))) +(rule (vec_cmp rn rm ty (Cond.Hs)) + (if (ty_vector_not_float ty)) + (vec_rrr (VecALUOp.Cmhs) rn rm (vector_size ty))) +(rule (vec_cmp rn rm ty (Cond.Hi)) + (if (ty_vector_not_float ty)) + (vec_rrr (VecALUOp.Cmhi) rn rm (vector_size ty))) +;; Integer swapped-operands +(rule (vec_cmp rn rm ty (Cond.Le)) + (if (ty_vector_not_float ty)) + (vec_rrr (VecALUOp.Cmge) rm rn (vector_size ty))) +(rule (vec_cmp rn rm ty (Cond.Lt)) + (if (ty_vector_not_float ty)) + (vec_rrr (VecALUOp.Cmgt) rm rn (vector_size ty))) +(rule 1 (vec_cmp rn rm ty (Cond.Ls)) + (if (ty_vector_not_float ty)) + (vec_rrr (VecALUOp.Cmhs) rm rn (vector_size ty))) +(rule (vec_cmp rn rm ty (Cond.Lo)) + (if (ty_vector_not_float ty)) + (vec_rrr (VecALUOp.Cmhi) rm rn (vector_size ty))) + +;; Helper for determining if any value in a vector is true. +;; This operation is implemented by using umaxp to create a scalar value, which +;; is then compared against zero. +;; +;; umaxp vn.4s, vm.4s, vm.4s +;; mov xm, vn.d[0] +;; cmp xm, #0 +(decl vanytrue (Reg Type) ProducesFlags) +(rule 1 (vanytrue src (ty_vec128 ty)) + (let ((src Reg (vec_rrr (VecALUOp.Umaxp) src src (VectorSize.Size32x4))) + (src Reg (mov_from_vec src 0 (ScalarSize.Size64)))) + (cmp_imm (OperandSize.Size64) src (u8_into_imm12 0)))) +(rule (vanytrue src ty) + (if (ty_vec64 ty)) + (let ((src Reg (mov_from_vec src 0 (ScalarSize.Size64)))) + (cmp_imm (OperandSize.Size64) src (u8_into_imm12 0)))) + +;;;; TLS Values ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Helper for emitting ElfTlsGetAddr. +(decl elf_tls_get_addr (ExternalName) Reg) +(rule (elf_tls_get_addr name) + (let ((dst WritableReg (temp_writable_reg $I64)) + (tmp WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.ElfTlsGetAddr (box_external_name name) dst tmp)))) + dst)) + +(decl macho_tls_get_addr (ExternalName) Reg) +(rule (macho_tls_get_addr name) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.MachOTlsGetAddr name dst)))) + dst)) + +;; A tuple of `ProducesFlags` and `IntCC`. +(type FlagsAndCC (enum (FlagsAndCC (flags ProducesFlags) + (cc IntCC)))) + +(spec (flags_and_cc flags cc) + (provide + (= result (concat (extract 67 64 flags) cc))) + (require + (or + (= cc (IntCC.Equal)) + (= cc (IntCC.NotEqual)) + (= cc (IntCC.UnsignedGreaterThanOrEqual)) + (= cc (IntCC.UnsignedGreaterThan)) + (= cc (IntCC.UnsignedLessThanOrEqual)) + (= cc (IntCC.UnsignedLessThan)) + (= cc (IntCC.SignedGreaterThanOrEqual)) + (= cc (IntCC.SignedGreaterThan)) + (= cc (IntCC.SignedLessThanOrEqual)) + (= cc (IntCC.SignedLessThan))))) +;; Helper constructor for `FlagsAndCC`. +(decl flags_and_cc (ProducesFlags IntCC) FlagsAndCC) +(rule (flags_and_cc flags cc) (FlagsAndCC.FlagsAndCC flags cc)) + +(spec (flags_and_cc_to_bool a) + (provide + (= result + (switch (extract 7 0 a) + ((IntCC.Equal) (if (= (extract 10 10 a) #b1) #x01 #x00)) + ((IntCC.NotEqual) (if (= (extract 10 10 a) #b0) #x01 #x00)) + ((IntCC.SignedGreaterThan) (if (and (= (extract 10 10 a) #b0) (= (extract 11 11 a) (extract 8 8 a))) #x01 #x00)) + ((IntCC.SignedGreaterThanOrEqual) (if (= (extract 11 11 a) (extract 8 8 a)) #x01 #x00)) + ((IntCC.SignedLessThan) (if (not (= (extract 11 11 a) (extract 8 8 a))) #x01 #x00)) + ((IntCC.SignedLessThanOrEqual) (if (or (= (extract 10 10 a) #b1) (not (= (extract 11 11 a) (extract 8 8 a)))) #x01 #x00)) + ((IntCC.UnsignedGreaterThan) (if (and (= (extract 9 9 a) #b1) (= (extract 10 10 a) #b0)) #x01 #x00)) + ((IntCC.UnsignedGreaterThanOrEqual) (if (= (extract 9 9 a) #b1) #x01 #x00)) + ((IntCC.UnsignedLessThan) (if (= (extract 9 9 a) #b0) #x01 #x00)) + ((IntCC.UnsignedLessThanOrEqual) (if (or (= (extract 9 9 a) #b0) (= (extract 10 10 a) #b1)) #x01 #x00))))) + (require + (or + (= (extract 7 0 a) (IntCC.Equal)) + (= (extract 7 0 a) (IntCC.NotEqual)) + (= (extract 7 0 a) (IntCC.UnsignedGreaterThanOrEqual)) + (= (extract 7 0 a) (IntCC.UnsignedGreaterThan)) + (= (extract 7 0 a) (IntCC.UnsignedLessThanOrEqual)) + (= (extract 7 0 a) (IntCC.UnsignedLessThan)) + (= (extract 7 0 a) (IntCC.SignedGreaterThanOrEqual)) + (= (extract 7 0 a) (IntCC.SignedGreaterThan)) + (= (extract 7 0 a) (IntCC.SignedLessThanOrEqual)) + (= (extract 7 0 a) (IntCC.SignedLessThan))))) +;; Materialize a `FlagsAndCC` into a boolean `ValueRegs`. +(decl flags_and_cc_to_bool (FlagsAndCC) ValueRegs) +(rule (flags_and_cc_to_bool (FlagsAndCC.FlagsAndCC flags cc)) + (with_flags flags (materialize_bool_result (cond_code cc)))) + +;; Get the `ProducesFlags` out of a `FlagsAndCC`. +(decl flags_and_cc_flags (FlagsAndCC) ProducesFlags) +(rule (flags_and_cc_flags (FlagsAndCC.FlagsAndCC flags _cc)) flags) + +;; Get the `IntCC` out of a `FlagsAndCC`. +(decl flags_and_cc_cc (FlagsAndCC) IntCC) +(rule (flags_and_cc_cc (FlagsAndCC.FlagsAndCC _flags cc)) cc) + +;; Helpers for lowering `icmp` sequences. +;; `lower_icmp` contains shared functionality for lowering `icmp` +;; sequences, which `lower_icmp_into_{reg,flags}` extend from. +(spec (lower_icmp c x y in_ty) + (provide + (= result + (concat + (extract 67 64 + (if (or (= c (IntCC.SignedGreaterThanOrEqual)) + (= c (IntCC.SignedGreaterThan)) + (= c (IntCC.SignedLessThanOrEqual)) + (= c (IntCC.SignedLessThan))) + (if (<= in_ty 32) + (subs 32 (sign_ext 64 x) (sign_ext 64 y)) + (subs 64 (sign_ext 64 x) (sign_ext 64 y))) + (if (<= in_ty 32) + (subs 32 (zero_ext 64 x) (zero_ext 64 y)) + (subs 64 (zero_ext 64 x) (zero_ext 64 y))))) + c))) + (require + (or + (= c (IntCC.Equal)) + (= c (IntCC.NotEqual)) + (= c (IntCC.UnsignedGreaterThanOrEqual)) + (= c (IntCC.UnsignedGreaterThan)) + (= c (IntCC.UnsignedLessThanOrEqual)) + (= c (IntCC.UnsignedLessThan)) + (= c (IntCC.SignedGreaterThanOrEqual)) + (= c (IntCC.SignedGreaterThan)) + (= c (IntCC.SignedLessThanOrEqual)) + (= c (IntCC.SignedLessThan))) + (or (= in_ty 8) + (= in_ty 16) + (= in_ty 32) + (= in_ty 64)) + (= in_ty (widthof x)) + (= in_ty (widthof y)))) +(instantiate lower_icmp + ((args (bv 8) (bv 8) (bv 8) Int) (ret (bv 12)) (canon (bv 8))) + ((args (bv 8) (bv 16) (bv 16) Int) (ret (bv 12)) (canon (bv 16))) + ((args (bv 8) (bv 32) (bv 32) Int) (ret (bv 12)) (canon (bv 32))) + ((args (bv 8) (bv 64) (bv 64) Int) (ret (bv 12)) (canon (bv 64))) +) +(decl lower_icmp (IntCC Value Value Type) FlagsAndCC) + +(spec (lower_icmp_into_reg c x y in_ty out_ty) + (provide + (= result + (switch c + ((IntCC.Equal) (if (= x y) #x01 #x00)) + ((IntCC.NotEqual) (if (not (= x y)) #x01 #x00)) + ((IntCC.SignedGreaterThan) (if (bvsgt x y) #x01 #x00)) + ((IntCC.SignedGreaterThanOrEqual) (if (bvsge x y) #x01 #x00)) + ((IntCC.SignedLessThan) (if (bvslt x y) #x01 #x00)) + ((IntCC.SignedLessThanOrEqual) (if (bvsle x y) #x01 #x00)) + ((IntCC.UnsignedGreaterThan) (if (bvugt x y) #x01 #x00)) + ((IntCC.UnsignedGreaterThanOrEqual) (if (bvuge x y) #x01 #x00)) + ((IntCC.UnsignedLessThan) (if (bvult x y) #x01 #x00)) + ((IntCC.UnsignedLessThanOrEqual) (if (bvule x y) #x01 #x00))))) + (require + (or + (= c (IntCC.Equal)) + (= c (IntCC.NotEqual)) + (= c (IntCC.UnsignedGreaterThanOrEqual)) + (= c (IntCC.UnsignedGreaterThan)) + (= c (IntCC.UnsignedLessThanOrEqual)) + (= c (IntCC.UnsignedLessThan)) + (= c (IntCC.SignedGreaterThanOrEqual)) + (= c (IntCC.SignedGreaterThan)) + (= c (IntCC.SignedLessThanOrEqual)) + (= c (IntCC.SignedLessThan))) + (or (= in_ty 8) + (= in_ty 16) + (= in_ty 32) + (= in_ty 64)) + (= in_ty (widthof x)) + (= in_ty (widthof y)) + (= out_ty 8))) +(instantiate lower_icmp_into_reg + ((args (bv 8) (bv 8) (bv 8) Int Int) (ret (bv 8)) (canon (bv 8))) + ((args (bv 8) (bv 16) (bv 16) Int Int) (ret (bv 8)) (canon (bv 16))) + ((args (bv 8) (bv 32) (bv 32) Int Int) (ret (bv 8)) (canon (bv 32))) + ((args (bv 8) (bv 64) (bv 64) Int Int) (ret (bv 8)) (canon (bv 64))) +) +(decl lower_icmp_into_reg (IntCC Value Value Type Type) ValueRegs) +(decl lower_icmp_into_flags (IntCC Value Value Type) FlagsAndCC) + +(spec (lower_icmp_const c x y in_ty) + (provide + (= result + (concat (extract 67 64 + (if (or (= c (IntCC.SignedGreaterThanOrEqual)) + (= c (IntCC.SignedGreaterThan)) + (= c (IntCC.SignedLessThanOrEqual)) + (= c (IntCC.SignedLessThan))) + (if (<= in_ty 32) + (subs 32 (sign_ext 64 x) y) + (subs 64 (sign_ext 64 x) y)) + (if (<= in_ty 32) + (subs 32 (zero_ext 64 x) y) + (subs 64 (zero_ext 64 x) y)))) + c))) + (require + (or + (= c (IntCC.Equal)) + (= c (IntCC.NotEqual)) + (= c (IntCC.UnsignedGreaterThanOrEqual)) + (= c (IntCC.UnsignedGreaterThan)) + (= c (IntCC.UnsignedLessThanOrEqual)) + (= c (IntCC.UnsignedLessThan)) + (= c (IntCC.SignedGreaterThanOrEqual)) + (= c (IntCC.SignedGreaterThan)) + (= c (IntCC.SignedLessThanOrEqual)) + (= c (IntCC.SignedLessThan))) + (or (= in_ty 32) (= in_ty 64)) + (= in_ty (widthof x)))) +(instantiate lower_icmp_const + ((args (bv 8) (bv 8) (bv 64) Int) (ret (bv 12)) (canon (bv 8))) + ((args (bv 8) (bv 16) (bv 64) Int) (ret (bv 12)) (canon (bv 16))) + ((args (bv 8) (bv 32) (bv 64) Int) (ret (bv 12)) (canon (bv 32))) + ((args (bv 8) (bv 64) (bv 64) Int) (ret (bv 12)) (canon (bv 64))) +) +(decl lower_icmp_const (IntCC Value u64 Type) FlagsAndCC) +;; For most cases, `lower_icmp_into_flags` is the same as `lower_icmp`, +;; except for some I128 cases (see below). +(rule -1 (lower_icmp_into_flags cond x y ty) (lower_icmp cond x y ty)) + +;; Vectors. +;; `icmp` into flags for vectors is invalid. +(rule 1 (lower_icmp_into_reg cond x y in_ty @ (multi_lane _ _) _out_ty) + (let ((cond Cond (cond_code cond)) + (rn Reg (put_in_reg x)) + (rm Reg (put_in_reg y))) + (vec_cmp rn rm in_ty cond))) + +;; Determines the appropriate extend op given the value type and the given ArgumentExtension. +(spec (lower_extend_op ty b) + (provide + (= result + (switch ty + (8 (switch b ((ArgumentExtension.Sext) (ExtendOp.SXTB)) + ((ArgumentExtension.Uext) (ExtendOp.UXTB)))) + (16 (switch b ((ArgumentExtension.Sext) (ExtendOp.SXTH)) + ((ArgumentExtension.Uext) (ExtendOp.UXTH))))))) + (require (or (= ty 8) (= ty 16) (= ty 32) (= ty 64)))) +(decl lower_extend_op (Type ArgumentExtension) ExtendOp) +(rule (lower_extend_op $I8 (ArgumentExtension.Sext)) (ExtendOp.SXTB)) +(rule (lower_extend_op $I16 (ArgumentExtension.Sext)) (ExtendOp.SXTH)) +(rule (lower_extend_op $I8 (ArgumentExtension.Uext)) (ExtendOp.UXTB)) +(rule (lower_extend_op $I16 (ArgumentExtension.Uext)) (ExtendOp.UXTH)) + +;; Integers <= 64-bits. +(rule lower_icmp_into_reg_8_16_32_64 -2 (lower_icmp_into_reg cond rn rm in_ty out_ty) + (if (ty_int_ref_scalar_64 in_ty)) + (let ((cc Cond (cond_code cond))) + (flags_and_cc_to_bool (lower_icmp cond rn rm in_ty)))) + +(rule lower_icmp_8_16_signed 1 (lower_icmp cond rn rm (fits_in_16 ty)) + (if (signed_cond_code cond)) + (let ((rn Reg (put_in_reg_sext32 rn))) + (flags_and_cc (cmp_extend (operand_size ty) rn rm (lower_extend_op ty (ArgumentExtension.Sext))) cond))) +(rule lower_icmp_8_16_unsigned_imm -1 (lower_icmp cond rn (imm12_from_value rm) (fits_in_16 ty)) + (let ((rn Reg (put_in_reg_zext32 rn))) + (flags_and_cc (cmp_imm (operand_size ty) rn rm) cond))) +(rule lower_icmp_8_16_unsigned -2 (lower_icmp cond rn rm (fits_in_16 ty)) + (let ((rn Reg (put_in_reg_zext32 rn))) + (flags_and_cc (cmp_extend (operand_size ty) rn rm (lower_extend_op ty (ArgumentExtension.Uext))) cond))) +(rule lower_icmp_32_64_const -3 (lower_icmp cond rn (u64_from_iconst c) ty) + (if (ty_int_ref_scalar_64 ty)) + (lower_icmp_const cond rn c ty)) +(rule lower_icmp_32_64 -4 (lower_icmp cond rn rm ty) + (if (ty_int_ref_scalar_64 ty)) + (flags_and_cc (cmp (operand_size ty) rn rm) cond)) + +;; We get better encodings when testing against an immediate that's even instead +;; of odd, so rewrite comparisons to use even immediates: +;; +;; A >= B + 1 +;; ==> A - 1 >= B +;; ==> A > B +(rule lower_icmp_const_32_64_ugte (lower_icmp_const (IntCC.UnsignedGreaterThanOrEqual) a b ty) + (if (ty_int_ref_scalar_64 ty)) + (if-let true (u64_is_odd b)) + (if-let (imm12_from_u64 imm) (u64_sub b 1)) + (flags_and_cc (cmp_imm (operand_size ty) a imm) (IntCC.UnsignedGreaterThan))) + +(rule lower_icmp_const_32_64_sgte (lower_icmp_const (IntCC.SignedGreaterThanOrEqual) a b ty) + (if (ty_int_ref_scalar_64 ty)) + (if-let true (u64_is_odd b)) + (if-let (imm12_from_u64 imm) (u64_sub b 1)) + (flags_and_cc (cmp_imm (operand_size ty) a imm) (IntCC.SignedGreaterThan))) + +(rule lower_icmp_const_32_64_imm -1 (lower_icmp_const cond rn (imm12_from_u64 c) ty) + (if (ty_int_ref_scalar_64 ty)) + (flags_and_cc (cmp_imm (operand_size ty) rn c) cond)) +(rule lower_icmp_const_32_64 -2 (lower_icmp_const cond rn c ty) + (if (ty_int_ref_scalar_64 ty)) + (flags_and_cc (cmp (operand_size ty) rn (imm ty (ImmExtend.Zero) c)) cond)) + + +;; 128-bit integers. +(rule (lower_icmp_into_reg cond @ (IntCC.Equal) rn rm $I128 $I8) + (let ((cc Cond (cond_code cond))) + (flags_and_cc_to_bool + (lower_icmp cond rn rm $I128)))) +(rule (lower_icmp_into_reg cond @ (IntCC.NotEqual) rn rm $I128 $I8) + (let ((cc Cond (cond_code cond))) + (flags_and_cc_to_bool + (lower_icmp cond rn rm $I128)))) + +;; cmp lhs_lo, rhs_lo +;; ccmp lhs_hi, rhs_hi, #0, eq +(decl lower_icmp_i128_eq_ne (Value Value) ProducesFlags) +(rule (lower_icmp_i128_eq_ne lhs rhs) + (let ((lhs ValueRegs (put_in_regs lhs)) + (rhs ValueRegs (put_in_regs rhs)) + (lhs_lo Reg (value_regs_get lhs 0)) + (lhs_hi Reg (value_regs_get lhs 1)) + (rhs_lo Reg (value_regs_get rhs 0)) + (rhs_hi Reg (value_regs_get rhs 1)) + (cmp_inst ProducesFlags (cmp (OperandSize.Size64) lhs_lo rhs_lo))) + (ccmp (OperandSize.Size64) lhs_hi rhs_hi + (nzcv false false false false) (Cond.Eq) cmp_inst))) + +(rule (lower_icmp (IntCC.Equal) lhs rhs $I128) + (flags_and_cc (lower_icmp_i128_eq_ne lhs rhs) (IntCC.Equal))) +(rule (lower_icmp (IntCC.NotEqual) lhs rhs $I128) + (flags_and_cc (lower_icmp_i128_eq_ne lhs rhs) (IntCC.NotEqual))) + +;; cmp lhs_lo, rhs_lo +;; cset tmp1, unsigned_cond +;; cmp lhs_hi, rhs_hi +;; cset tmp2, cond +;; csel dst, tmp1, tmp2, eq +(rule -1 (lower_icmp_into_reg cond lhs rhs $I128 $I8) + (let ((unsigned_cond Cond (cond_code (intcc_unsigned cond))) + (cond Cond (cond_code cond)) + (lhs ValueRegs (put_in_regs lhs)) + (rhs ValueRegs (put_in_regs rhs)) + (lhs_lo Reg (value_regs_get lhs 0)) + (lhs_hi Reg (value_regs_get lhs 1)) + (rhs_lo Reg (value_regs_get rhs 0)) + (rhs_hi Reg (value_regs_get rhs 1)) + (tmp1 Reg (with_flags_reg (cmp (OperandSize.Size64) lhs_lo rhs_lo) + (materialize_bool_result unsigned_cond)))) + (with_flags (cmp (OperandSize.Size64) lhs_hi rhs_hi) + (lower_icmp_i128_consumer cond tmp1)))) + +(decl lower_icmp_i128_consumer (Cond Reg) ConsumesFlags) +(rule (lower_icmp_i128_consumer cond tmp1) + (let ((tmp2 WritableReg (temp_writable_reg $I64)) + (dst WritableReg (temp_writable_reg $I64))) + (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs + (MInst.CSet tmp2 cond) + (MInst.CSel dst (Cond.Eq) tmp1 tmp2) + (value_reg dst)))) + +(decl lower_bmask (Type Type ValueRegs) ValueRegs) + + +;; For conversions that exactly fit a register, we can use csetm. +;; +;; cmp val, #0 +;; csetm res, ne +(rule 0 + (lower_bmask (fits_in_64 _) (ty_32_or_64 in_ty) val) + (with_flags_reg + (cmp_imm (operand_size in_ty) (value_regs_get val 0) (u8_into_imm12 0)) + (csetm (Cond.Ne)))) + +;; For conversions from a 128-bit value into a 64-bit or smaller one, we or the +;; two registers of the 128-bit value together, and then recurse with the +;; combined value as a 64-bit test. +;; +;; orr val, lo, hi +;; cmp val, #0 +;; csetm res, ne +(rule 1 + (lower_bmask (fits_in_64 ty) $I128 val) + (let ((lo Reg (value_regs_get val 0)) + (hi Reg (value_regs_get val 1)) + (combined Reg (orr $I64 lo hi))) + (lower_bmask ty $I64 (value_reg combined)))) + +;; For converting from any type into i128, duplicate the result of +;; converting to i64. +(rule 2 + (lower_bmask $I128 in_ty val) + (let ((res ValueRegs (lower_bmask $I64 in_ty val)) + (res Reg (value_regs_get res 0))) + (value_regs res res))) + +;; For conversions smaller than a register, we need to mask off the high bits, and then +;; we can recurse into the general case. +;; +;; and tmp, val, #ty_mask +;; cmp tmp, #0 +;; csetm res, ne +(rule 3 + (lower_bmask out_ty (fits_in_16 in_ty) val) + ; This if-let can't fail due to ty_mask always producing 8/16 consecutive 1s. + (if-let mask_bits (imm_logic_from_u64 $I32 (ty_mask in_ty))) + (let ((masked Reg (and_imm $I32 (value_regs_get val 0) mask_bits))) + (lower_bmask out_ty $I32 masked))) + +;; Exceptional `lower_icmp_into_flags` rules. +;; We need to guarantee that the flags for `cond` are correct, so we +;; compare `dst` with 1. +(rule (lower_icmp_into_flags cond @ (IntCC.SignedGreaterThanOrEqual) lhs rhs $I128) + (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $I8)) + (dst Reg (value_regs_get dst 0)) + (tmp Reg (imm $I64 (ImmExtend.Sign) 1))) ;; mov tmp, #1 + (flags_and_cc (cmp (OperandSize.Size64) dst tmp) cond))) +(rule (lower_icmp_into_flags cond @ (IntCC.UnsignedGreaterThanOrEqual) lhs rhs $I128) + (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $I8)) + (dst Reg (value_regs_get dst 0)) + (tmp Reg (imm $I64 (ImmExtend.Zero) 1))) + (flags_and_cc (cmp (OperandSize.Size64) dst tmp) cond))) +(rule (lower_icmp_into_flags cond @ (IntCC.SignedLessThanOrEqual) lhs rhs $I128) + (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $I8)) + (dst Reg (value_regs_get dst 0)) + (tmp Reg (imm $I64 (ImmExtend.Sign) 1))) + (flags_and_cc (cmp (OperandSize.Size64) tmp dst) cond))) +(rule (lower_icmp_into_flags cond @ (IntCC.UnsignedLessThanOrEqual) lhs rhs $I128) + (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $I8)) + (dst Reg (value_regs_get dst 0)) + (tmp Reg (imm $I64 (ImmExtend.Zero) 1))) + (flags_and_cc (cmp (OperandSize.Size64) tmp dst) cond))) +;; For strict comparisons, we compare with 0. +(rule (lower_icmp_into_flags cond @ (IntCC.SignedGreaterThan) lhs rhs $I128) + (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $I8)) + (dst Reg (value_regs_get dst 0))) + (flags_and_cc (cmp (OperandSize.Size64) dst (zero_reg)) cond))) +(rule (lower_icmp_into_flags cond @ (IntCC.UnsignedGreaterThan) lhs rhs $I128) + (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $I8)) + (dst Reg (value_regs_get dst 0))) + (flags_and_cc (cmp (OperandSize.Size64) dst (zero_reg)) cond))) +(rule (lower_icmp_into_flags cond @ (IntCC.SignedLessThan) lhs rhs $I128) + (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $I8)) + (dst Reg (value_regs_get dst 0))) + (flags_and_cc (cmp (OperandSize.Size64) (zero_reg) dst) cond))) +(rule (lower_icmp_into_flags cond @ (IntCC.UnsignedLessThan) lhs rhs $I128) + (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $I8)) + (dst Reg (value_regs_get dst 0))) + (flags_and_cc (cmp (OperandSize.Size64) (zero_reg) dst) cond))) + +;; Helpers for generating select instruction sequences. +(decl lower_select (ProducesFlags Cond Type Value Value) ValueRegs) +(rule 2 (lower_select flags cond (ty_scalar_float (fits_in_64 ty)) rn rm) + (with_flags flags (fpu_csel ty cond rn rm))) +(rule 4 (lower_select flags cond $F128 rn rm) + (with_flags flags (vec_csel cond rn rm))) +(rule 3 (lower_select flags cond (ty_vec128 ty) rn rm) + (with_flags flags (vec_csel cond rn rm))) +(rule (lower_select flags cond ty rn rm) + (if (ty_vec64 ty)) + (with_flags flags (fpu_csel $F64 cond rn rm))) +(rule 4 (lower_select flags cond $I128 rn rm) + (let ((dst_lo WritableReg (temp_writable_reg $I64)) + (dst_hi WritableReg (temp_writable_reg $I64)) + (rn ValueRegs (put_in_regs rn)) + (rm ValueRegs (put_in_regs rm)) + (rn_lo Reg (value_regs_get rn 0)) + (rn_hi Reg (value_regs_get rn 1)) + (rm_lo Reg (value_regs_get rm 0)) + (rm_hi Reg (value_regs_get rm 1))) + (with_flags flags + (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs + (MInst.CSel dst_lo cond rn_lo rm_lo) + (MInst.CSel dst_hi cond rn_hi rm_hi) + (value_regs dst_lo dst_hi))))) +(rule 1 (lower_select flags cond ty rn rm) + (if (ty_int_ref_scalar_64 ty)) + (with_flags flags (csel cond rn rm))) + +;; Helper for emitting `MInst.Jump` instructions. +(decl aarch64_jump (BranchTarget) SideEffectNoResult) +(rule (aarch64_jump target) + (SideEffectNoResult.Inst (MInst.Jump target))) + +;; Helper for emitting `MInst.JTSequence` instructions. +;; Emit the compound instruction that does: +;; +;; b.hs default +;; csel rB, xzr, rIndex, hs +;; csdb +;; adr rA, jt +;; ldrsw rB, [rA, rB, uxtw #2] +;; add rA, rA, rB +;; br rA +;; [jt entries] +;; +;; This must be *one* instruction in the vcode because +;; we cannot allow regalloc to insert any spills/fills +;; in the middle of the sequence; otherwise, the ADR's +;; PC-rel offset to the jumptable would be incorrect. +;; (The alternative is to introduce a relocation pass +;; for inlined jumptables, which is much worse, IMHO.) +(decl jt_sequence (Reg MachLabel BoxVecMachLabel) ConsumesFlags) +(rule (jt_sequence ridx default targets) + (let ((rtmp1 WritableReg (temp_writable_reg $I64)) + (rtmp2 WritableReg (temp_writable_reg $I64))) + (ConsumesFlags.ConsumesFlagsSideEffect + (MInst.JTSequence default targets ridx rtmp1 rtmp2)))) + +;; Helper for emitting `MInst.CondBr` instructions. +(decl cond_br (BranchTarget BranchTarget CondBrKind) ConsumesFlags) +(rule (cond_br taken not_taken kind) + (ConsumesFlags.ConsumesFlagsSideEffect + (MInst.CondBr taken not_taken kind))) + +;; Helper for emitting `MInst.TestBitAndBranch` instructions. +(decl test_branch (TestBitAndBranchKind BranchTarget BranchTarget Reg u8) SideEffectNoResult) +(rule (test_branch kind taken not_taken rn bit) + (SideEffectNoResult.Inst (MInst.TestBitAndBranch kind taken not_taken rn bit))) + +;; Helper for emitting `tbnz` instructions. +(decl tbnz (BranchTarget BranchTarget Reg u8) SideEffectNoResult) +(rule (tbnz taken not_taken rn bit) + (test_branch (TestBitAndBranchKind.NZ) taken not_taken rn bit)) + +;; Helper for emitting `tbz` instructions. +(decl tbz (BranchTarget BranchTarget Reg u8) SideEffectNoResult) +(rule (tbz taken not_taken rn bit) + (test_branch (TestBitAndBranchKind.Z) taken not_taken rn bit)) + +;; Helper for emitting `MInst.MovToNZCV` instructions. +(decl mov_to_nzcv (Reg) ProducesFlags) +(rule (mov_to_nzcv rn) + (ProducesFlags.ProducesFlagsSideEffect + (MInst.MovToNZCV rn))) + +;; Helper for emitting `MInst.EmitIsland` instructions. +(decl emit_island (CodeOffset) SideEffectNoResult) +(rule (emit_island needed_space) + (SideEffectNoResult.Inst + (MInst.EmitIsland needed_space))) + +;; Helper for emitting `br_table` sequences. +(decl br_table_impl (u64 Reg MachLabel BoxVecMachLabel) Unit) +(rule (br_table_impl (imm12_from_u64 jt_size) ridx default targets) + (emit_side_effect (with_flags_side_effect + (cmp_imm (OperandSize.Size32) ridx jt_size) + (jt_sequence ridx default targets)))) +(rule -1 (br_table_impl jt_size ridx default targets) + (let ((jt_size Reg (imm $I64 (ImmExtend.Zero) jt_size))) + (emit_side_effect (with_flags_side_effect + (cmp (OperandSize.Size32) ridx jt_size) + (jt_sequence ridx default targets))))) + +;; Helper for emitting the `uzp1` instruction +(decl vec_uzp1 (Reg Reg VectorSize) Reg) +(rule (vec_uzp1 rn rm size) (vec_rrr (VecALUOp.Uzp1) rn rm size)) + +;; Helper for emitting the `uzp2` instruction +(decl vec_uzp2 (Reg Reg VectorSize) Reg) +(rule (vec_uzp2 rn rm size) (vec_rrr (VecALUOp.Uzp2) rn rm size)) + +;; Helper for emitting the `zip1` instruction +(decl vec_zip1 (Reg Reg VectorSize) Reg) +(rule (vec_zip1 rn rm size) (vec_rrr (VecALUOp.Zip1) rn rm size)) + +;; Helper for emitting the `zip2` instruction +(decl vec_zip2 (Reg Reg VectorSize) Reg) +(rule (vec_zip2 rn rm size) (vec_rrr (VecALUOp.Zip2) rn rm size)) + +;; Helper for emitting the `trn1` instruction +(decl vec_trn1 (Reg Reg VectorSize) Reg) +(rule (vec_trn1 rn rm size) (vec_rrr (VecALUOp.Trn1) rn rm size)) + +;; Helper for emitting the `trn2` instruction +(decl vec_trn2 (Reg Reg VectorSize) Reg) +(rule (vec_trn2 rn rm size) (vec_rrr (VecALUOp.Trn2) rn rm size)) + +;; Helper for creating a zero value `ASIMDMovModImm` immediate. +(decl asimd_mov_mod_imm_zero (ScalarSize) ASIMDMovModImm) +(extern constructor asimd_mov_mod_imm_zero asimd_mov_mod_imm_zero) + +;; Helper for fallibly creating an `ASIMDMovModImm` immediate from its parts. +(decl pure partial asimd_mov_mod_imm_from_u64 (u64 ScalarSize) ASIMDMovModImm) +(extern constructor asimd_mov_mod_imm_from_u64 asimd_mov_mod_imm_from_u64) + +;; Helper for fallibly creating an `ASIMDFPModImm` immediate from its parts. +(decl pure partial asimd_fp_mod_imm_from_u64 (u64 ScalarSize) ASIMDFPModImm) +(extern constructor asimd_fp_mod_imm_from_u64 asimd_fp_mod_imm_from_u64) + +;; Helper for creating a `VecDupFPImm` instruction +(decl vec_dup_fp_imm (ASIMDFPModImm VectorSize) Reg) +(rule (vec_dup_fp_imm imm size) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.VecDupFPImm dst imm size)))) + dst)) + +;; Helper for creating a `FpuLoad64` instruction +(decl fpu_load64 (AMode MemFlags) Reg) +(rule (fpu_load64 amode flags) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.FpuLoad64 dst amode flags)))) + dst)) + +;; Helper for creating a `FpuLoad128` instruction +(decl fpu_load128 (AMode MemFlags) Reg) +(rule (fpu_load128 amode flags) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.FpuLoad128 dst amode flags)))) + dst)) diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/isa/aarch64/inst/args.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/isa/aarch64/inst/args.rs new file mode 100644 index 000000000..ee6e885d7 --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/isa/aarch64/inst/args.rs @@ -0,0 +1,711 @@ +//! AArch64 ISA definitions: instruction arguments. + +use crate::ir::types::*; +use crate::isa::aarch64::inst::*; + +//============================================================================= +// Instruction sub-components: shift and extend descriptors + +/// A shift operator for a register or immediate. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[repr(u8)] +pub enum ShiftOp { + /// Logical shift left. + LSL = 0b00, + /// Logical shift right. + LSR = 0b01, + /// Arithmetic shift right. + ASR = 0b10, + /// Rotate right. + ROR = 0b11, +} + +impl ShiftOp { + /// Get the encoding of this shift op. + pub fn bits(self) -> u8 { + self as u8 + } +} + +/// A shift operator amount. +#[derive(Clone, Copy, Debug)] +pub struct ShiftOpShiftImm(u8); + +impl ShiftOpShiftImm { + /// Maximum shift for shifted-register operands. + pub const MAX_SHIFT: u64 = 63; + + /// Create a new shiftop shift amount, if possible. + pub fn maybe_from_shift(shift: u64) -> Option { + if shift <= Self::MAX_SHIFT { + Some(ShiftOpShiftImm(shift as u8)) + } else { + None + } + } + + /// Return the shift amount. + pub fn value(self) -> u8 { + self.0 + } + + /// Mask down to a given number of bits. + pub fn mask(self, bits: u8) -> ShiftOpShiftImm { + ShiftOpShiftImm(self.0 & (bits - 1)) + } +} + +/// A shift operator with an amount, guaranteed to be within range. +#[derive(Copy, Clone, Debug)] +pub struct ShiftOpAndAmt { + /// The shift operator. + op: ShiftOp, + /// The shift operator amount. + shift: ShiftOpShiftImm, +} + +impl ShiftOpAndAmt { + /// Create a new shift operator with an amount. + pub fn new(op: ShiftOp, shift: ShiftOpShiftImm) -> ShiftOpAndAmt { + ShiftOpAndAmt { op, shift } + } + + /// Get the shift op. + pub fn op(&self) -> ShiftOp { + self.op + } + + /// Get the shift amount. + pub fn amt(&self) -> ShiftOpShiftImm { + self.shift + } +} + +/// An extend operator for a register. +#[derive(Clone, Copy, Debug)] +#[repr(u8)] +pub enum ExtendOp { + /// Unsigned extend byte. + UXTB = 0b000, + /// Unsigned extend halfword. + UXTH = 0b001, + /// Unsigned extend word. + UXTW = 0b010, + /// Unsigned extend doubleword. + UXTX = 0b011, + /// Signed extend byte. + SXTB = 0b100, + /// Signed extend halfword. + SXTH = 0b101, + /// Signed extend word. + SXTW = 0b110, + /// Signed extend doubleword. + SXTX = 0b111, +} + +impl ExtendOp { + /// Encoding of this op. + pub fn bits(self) -> u8 { + self as u8 + } +} + +//============================================================================= +// Instruction sub-components (memory addresses): definitions + +/// A reference to some memory address. +#[derive(Clone, Debug)] +pub enum MemLabel { + /// An address in the code, a constant pool or jumptable, with relative + /// offset from this instruction. This form must be used at emission time; + /// see `memlabel_finalize()` for how other forms are lowered to this one. + PCRel(i32), + /// An address that refers to a label within a `MachBuffer`, for example a + /// constant that lives in the pool at the end of the function. + Mach(MachLabel), +} + +impl AMode { + /// Memory reference using an address in a register. + pub fn reg(reg: Reg) -> AMode { + // Use UnsignedOffset rather than Unscaled to use ldr rather than ldur. + // This also does not use PostIndexed / PreIndexed as they update the register. + AMode::UnsignedOffset { + rn: reg, + uimm12: UImm12Scaled::zero(I64), + } + } + + /// Memory reference using `reg1 + sizeof(ty) * reg2` as an address, with `reg2` sign- or + /// zero-extended as per `op`. + pub fn reg_plus_reg_scaled_extended(reg1: Reg, reg2: Reg, op: ExtendOp) -> AMode { + AMode::RegScaledExtended { + rn: reg1, + rm: reg2, + extendop: op, + } + } +} + +pub use crate::isa::aarch64::lower::isle::generated_code::PairAMode; + +//============================================================================= +// Instruction sub-components (conditions, branches and branch targets): +// definitions + +/// Condition for conditional branches. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[repr(u8)] +pub enum Cond { + /// Equal. + Eq = 0, + /// Not equal. + Ne = 1, + /// Unsigned greater than or equal to. + Hs = 2, + /// Unsigned less than. + Lo = 3, + /// Minus, negative. + Mi = 4, + /// Positive or zero. + Pl = 5, + /// Signed overflow. + Vs = 6, + /// No signed overflow. + Vc = 7, + /// Unsigned greater than. + Hi = 8, + /// Unsigned less than or equal to. + Ls = 9, + /// Signed greater or equal to. + Ge = 10, + /// Signed less than. + Lt = 11, + /// Signed greater than. + Gt = 12, + /// Signed less than or equal. + Le = 13, + /// Always executed. + Al = 14, + /// Always executed. + Nv = 15, +} + +impl Cond { + /// Return the inverted condition. + pub fn invert(self) -> Cond { + match self { + Cond::Eq => Cond::Ne, + Cond::Ne => Cond::Eq, + + Cond::Hs => Cond::Lo, + Cond::Lo => Cond::Hs, + + Cond::Mi => Cond::Pl, + Cond::Pl => Cond::Mi, + + Cond::Vs => Cond::Vc, + Cond::Vc => Cond::Vs, + + Cond::Hi => Cond::Ls, + Cond::Ls => Cond::Hi, + + Cond::Ge => Cond::Lt, + Cond::Lt => Cond::Ge, + + Cond::Gt => Cond::Le, + Cond::Le => Cond::Gt, + + Cond::Al => Cond::Nv, + Cond::Nv => Cond::Al, + } + } + + /// Return the machine encoding of this condition. + pub fn bits(self) -> u32 { + self as u32 + } +} + +/// The kind of conditional branch: the common-case-optimized "reg-is-zero" / +/// "reg-is-nonzero" variants, or the generic one that tests the machine +/// condition codes. +#[derive(Clone, Copy, Debug)] +pub enum CondBrKind { + /// Condition: given register is zero. + Zero(Reg, OperandSize), + /// Condition: given register is nonzero. + NotZero(Reg, OperandSize), + /// Condition: the given condition-code test is true. + Cond(Cond), +} + +impl CondBrKind { + /// Return the inverted branch condition. + pub fn invert(self) -> CondBrKind { + match self { + CondBrKind::Zero(reg, size) => CondBrKind::NotZero(reg, size), + CondBrKind::NotZero(reg, size) => CondBrKind::Zero(reg, size), + CondBrKind::Cond(c) => CondBrKind::Cond(c.invert()), + } + } +} + +/// A branch target. Either unresolved (basic-block index) or resolved (offset +/// from end of current instruction). +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum BranchTarget { + /// An unresolved reference to a Label, as passed into + /// `lower_branch_group()`. + Label(MachLabel), + /// A fixed PC offset. + ResolvedOffset(i32), +} + +impl BranchTarget { + /// Return the target's label, if it is a label-based target. + pub fn as_label(self) -> Option { + match self { + BranchTarget::Label(l) => Some(l), + _ => None, + } + } + + /// Return the target's offset, if specified, or zero if label-based. + pub fn as_offset14_or_zero(self) -> u32 { + self.as_offset_bounded(14) + } + + /// Return the target's offset, if specified, or zero if label-based. + pub fn as_offset19_or_zero(self) -> u32 { + self.as_offset_bounded(19) + } + + /// Return the target's offset, if specified, or zero if label-based. + pub fn as_offset26_or_zero(self) -> u32 { + self.as_offset_bounded(26) + } + + fn as_offset_bounded(self, bits: u32) -> u32 { + let off = match self { + BranchTarget::ResolvedOffset(off) => off >> 2, + _ => 0, + }; + let hi = (1 << (bits - 1)) - 1; + let lo = -(1 << bits - 1); + assert!(off <= hi); + assert!(off >= lo); + (off as u32) & ((1 << bits) - 1) + } +} + +impl PrettyPrint for ShiftOpAndAmt { + fn pretty_print(&self, _: u8) -> String { + format!("{:?} {}", self.op(), self.amt().value()) + } +} + +impl PrettyPrint for ExtendOp { + fn pretty_print(&self, _: u8) -> String { + format!("{self:?}") + } +} + +impl PrettyPrint for MemLabel { + fn pretty_print(&self, _: u8) -> String { + match self { + MemLabel::PCRel(off) => format!("pc+{off}"), + MemLabel::Mach(off) => format!("label({})", off.as_u32()), + } + } +} + +fn shift_for_type(size_bytes: u8) -> usize { + match size_bytes { + 1 => 0, + 2 => 1, + 4 => 2, + 8 => 3, + 16 => 4, + _ => panic!("unknown type size: {size_bytes}"), + } +} + +impl PrettyPrint for AMode { + fn pretty_print(&self, size_bytes: u8) -> String { + debug_assert!(size_bytes != 0); + match self { + &AMode::Unscaled { rn, simm9 } => { + let reg = pretty_print_reg(rn); + if simm9.value != 0 { + let simm9 = simm9.pretty_print(8); + format!("[{reg}, {simm9}]") + } else { + format!("[{reg}]") + } + } + &AMode::UnsignedOffset { rn, uimm12 } => { + let reg = pretty_print_reg(rn); + if uimm12.value() != 0 { + let uimm12 = uimm12.pretty_print(8); + format!("[{reg}, {uimm12}]") + } else { + format!("[{reg}]") + } + } + &AMode::RegReg { rn, rm } => { + let r1 = pretty_print_reg(rn); + let r2 = pretty_print_reg(rm); + format!("[{r1}, {r2}]") + } + &AMode::RegScaled { rn, rm } => { + let r1 = pretty_print_reg(rn); + let r2 = pretty_print_reg(rm); + let shift = shift_for_type(size_bytes); + format!("[{r1}, {r2}, LSL #{shift}]") + } + &AMode::RegScaledExtended { rn, rm, extendop } => { + let shift = shift_for_type(size_bytes); + let size = match extendop { + ExtendOp::SXTW | ExtendOp::UXTW => OperandSize::Size32, + _ => OperandSize::Size64, + }; + let r1 = pretty_print_reg(rn); + let r2 = pretty_print_ireg(rm, size); + let op = extendop.pretty_print(0); + format!("[{r1}, {r2}, {op} #{shift}]") + } + &AMode::RegExtended { rn, rm, extendop } => { + let size = match extendop { + ExtendOp::SXTW | ExtendOp::UXTW => OperandSize::Size32, + _ => OperandSize::Size64, + }; + let r1 = pretty_print_reg(rn); + let r2 = pretty_print_ireg(rm, size); + let op = extendop.pretty_print(0); + format!("[{r1}, {r2}, {op}]") + } + &AMode::Label { ref label } => label.pretty_print(0), + &AMode::SPPreIndexed { simm9 } => { + let simm9 = simm9.pretty_print(8); + format!("[sp, {simm9}]!") + } + &AMode::SPPostIndexed { simm9 } => { + let simm9 = simm9.pretty_print(8); + format!("[sp], {simm9}") + } + AMode::Const { addr } => format!("[const({})]", addr.as_u32()), + + // Eliminated by `mem_finalize()`. + &AMode::SPOffset { .. } + | &AMode::FPOffset { .. } + | &AMode::IncomingArg { .. } + | &AMode::SlotOffset { .. } + | &AMode::RegOffset { .. } => { + panic!("Unexpected pseudo mem-arg mode: {self:?}") + } + } + } +} + +impl PrettyPrint for PairAMode { + fn pretty_print(&self, _: u8) -> String { + match self { + &PairAMode::SignedOffset { reg, simm7 } => { + let reg = pretty_print_reg(reg); + if simm7.value != 0 { + let simm7 = simm7.pretty_print(8); + format!("[{reg}, {simm7}]") + } else { + format!("[{reg}]") + } + } + &PairAMode::SPPreIndexed { simm7 } => { + let simm7 = simm7.pretty_print(8); + format!("[sp, {simm7}]!") + } + &PairAMode::SPPostIndexed { simm7 } => { + let simm7 = simm7.pretty_print(8); + format!("[sp], {simm7}") + } + } + } +} + +impl PrettyPrint for Cond { + fn pretty_print(&self, _: u8) -> String { + let mut s = format!("{self:?}"); + s.make_ascii_lowercase(); + s + } +} + +impl PrettyPrint for BranchTarget { + fn pretty_print(&self, _: u8) -> String { + match self { + &BranchTarget::Label(label) => format!("label{:?}", label.as_u32()), + &BranchTarget::ResolvedOffset(off) => format!("{off}"), + } + } +} + +/// Type used to communicate the operand size of a machine instruction, as AArch64 has 32- and +/// 64-bit variants of many instructions (and integer registers). +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum OperandSize { + /// 32-bit. + Size32, + /// 64-bit. + Size64, +} + +impl OperandSize { + /// 32-bit case? + pub fn is32(self) -> bool { + self == OperandSize::Size32 + } + + /// 64-bit case? + pub fn is64(self) -> bool { + self == OperandSize::Size64 + } + + /// Convert from a needed width to the smallest size that fits. + pub fn from_bits>(bits: I) -> OperandSize { + let bits: usize = bits.into(); + assert!(bits <= 64); + if bits <= 32 { + OperandSize::Size32 + } else { + OperandSize::Size64 + } + } + + /// Return the operand size in bits. + pub fn bits(&self) -> u8 { + match self { + OperandSize::Size32 => 32, + OperandSize::Size64 => 64, + } + } + + /// Convert from an integer type into the smallest size that fits. + pub fn from_ty(ty: Type) -> OperandSize { + debug_assert!(!ty.is_vector()); + + Self::from_bits(ty_bits(ty)) + } + + /// Convert to I32, I64, or I128. + pub fn to_ty(self) -> Type { + match self { + OperandSize::Size32 => I32, + OperandSize::Size64 => I64, + } + } + + /// Register interpretation bit. + /// When 0, the register is interpreted as the 32-bit version. + /// When 1, the register is interpreted as the 64-bit version. + pub fn sf_bit(&self) -> u32 { + match self { + OperandSize::Size32 => 0, + OperandSize::Size64 => 1, + } + } + + /// The maximum unsigned value representable in a value of this size. + pub fn max_value(&self) -> u64 { + match self { + OperandSize::Size32 => u32::MAX as u64, + OperandSize::Size64 => u64::MAX, + } + } +} + +/// Type used to communicate the size of a scalar SIMD & FP operand. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum ScalarSize { + /// 8-bit. + Size8, + /// 16-bit. + Size16, + /// 32-bit. + Size32, + /// 64-bit. + Size64, + /// 128-bit. + Size128, +} + +impl ScalarSize { + /// Convert to an integer operand size. + pub fn operand_size(&self) -> OperandSize { + match self { + ScalarSize::Size8 | ScalarSize::Size16 | ScalarSize::Size32 => OperandSize::Size32, + ScalarSize::Size64 => OperandSize::Size64, + _ => panic!("Unexpected operand_size request for: {self:?}"), + } + } + + /// Return the encoding bits that are used by some scalar FP instructions + /// for a particular operand size. + pub fn ftype(&self) -> u32 { + match self { + ScalarSize::Size16 => 0b11, + ScalarSize::Size32 => 0b00, + ScalarSize::Size64 => 0b01, + _ => panic!("Unexpected scalar FP operand size: {self:?}"), + } + } + + /// Return the widened version of the scalar size. + pub fn widen(&self) -> ScalarSize { + match self { + ScalarSize::Size8 => ScalarSize::Size16, + ScalarSize::Size16 => ScalarSize::Size32, + ScalarSize::Size32 => ScalarSize::Size64, + ScalarSize::Size64 => ScalarSize::Size128, + ScalarSize::Size128 => panic!("can't widen 128-bits"), + } + } + + /// Return the narrowed version of the scalar size. + pub fn narrow(&self) -> ScalarSize { + match self { + ScalarSize::Size8 => panic!("can't narrow 8-bits"), + ScalarSize::Size16 => ScalarSize::Size8, + ScalarSize::Size32 => ScalarSize::Size16, + ScalarSize::Size64 => ScalarSize::Size32, + ScalarSize::Size128 => ScalarSize::Size64, + } + } + + /// Return a type with the same size as this scalar. + pub fn ty(&self) -> Type { + match self { + ScalarSize::Size8 => I8, + ScalarSize::Size16 => I16, + ScalarSize::Size32 => I32, + ScalarSize::Size64 => I64, + ScalarSize::Size128 => I128, + } + } +} + +/// Type used to communicate the size of a vector operand. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum VectorSize { + /// 8-bit, 8 lanes. + Size8x8, + /// 8 bit, 16 lanes. + Size8x16, + /// 16-bit, 4 lanes. + Size16x4, + /// 16-bit, 8 lanes. + Size16x8, + /// 32-bit, 2 lanes. + Size32x2, + /// 32-bit, 4 lanes. + Size32x4, + /// 64-bit, 2 lanes. + Size64x2, +} + +impl VectorSize { + /// Get the vector operand size with the given scalar size as lane size. + pub fn from_lane_size(size: ScalarSize, is_128bit: bool) -> VectorSize { + match (size, is_128bit) { + (ScalarSize::Size8, false) => VectorSize::Size8x8, + (ScalarSize::Size8, true) => VectorSize::Size8x16, + (ScalarSize::Size16, false) => VectorSize::Size16x4, + (ScalarSize::Size16, true) => VectorSize::Size16x8, + (ScalarSize::Size32, false) => VectorSize::Size32x2, + (ScalarSize::Size32, true) => VectorSize::Size32x4, + (ScalarSize::Size64, true) => VectorSize::Size64x2, + _ => panic!("Unexpected scalar FP operand size: {size:?}"), + } + } + + /// Get the integer operand size that corresponds to a lane of a vector with a certain size. + pub fn operand_size(&self) -> OperandSize { + match self { + VectorSize::Size64x2 => OperandSize::Size64, + _ => OperandSize::Size32, + } + } + + /// Get the scalar operand size that corresponds to a lane of a vector with a certain size. + pub fn lane_size(&self) -> ScalarSize { + match self { + VectorSize::Size8x8 | VectorSize::Size8x16 => ScalarSize::Size8, + VectorSize::Size16x4 | VectorSize::Size16x8 => ScalarSize::Size16, + VectorSize::Size32x2 | VectorSize::Size32x4 => ScalarSize::Size32, + VectorSize::Size64x2 => ScalarSize::Size64, + } + } + + /// Returns true if the VectorSize is 128-bits. + pub fn is_128bits(&self) -> bool { + match self { + VectorSize::Size8x8 => false, + VectorSize::Size8x16 => true, + VectorSize::Size16x4 => false, + VectorSize::Size16x8 => true, + VectorSize::Size32x2 => false, + VectorSize::Size32x4 => true, + VectorSize::Size64x2 => true, + } + } + + /// Return the encoding bits that are used by some SIMD instructions + /// for a particular operand size. + pub fn enc_size(&self) -> (u32, u32) { + let q = self.is_128bits() as u32; + let size = match self.lane_size() { + ScalarSize::Size8 => 0b00, + ScalarSize::Size16 => 0b01, + ScalarSize::Size32 => 0b10, + ScalarSize::Size64 => 0b11, + _ => unreachable!(), + }; + + (q, size) + } + + /// Return the encoding bit that is used by some floating-point SIMD + /// instructions for a particular operand size. + pub fn enc_float_size(&self) -> u32 { + match self.lane_size() { + ScalarSize::Size32 => 0b0, + ScalarSize::Size64 => 0b1, + size => panic!("Unsupported floating-point size for vector op: {size:?}"), + } + } +} + +impl APIKey { + /// Returns the encoding of the `auti{key}` instruction used to decrypt the + /// `lr` register. + pub fn enc_auti_hint(&self) -> u32 { + let (crm, op2) = match self { + APIKey::AZ => (0b0011, 0b100), + APIKey::ASP => (0b0011, 0b101), + APIKey::BZ => (0b0011, 0b110), + APIKey::BSP => (0b0011, 0b111), + }; + 0xd503201f | (crm << 8) | (op2 << 5) + } +} + +pub use crate::isa::aarch64::lower::isle::generated_code::TestBitAndBranchKind; + +impl TestBitAndBranchKind { + /// Complements this branch condition to act on the opposite result. + pub fn complement(&self) -> TestBitAndBranchKind { + match self { + TestBitAndBranchKind::Z => TestBitAndBranchKind::NZ, + TestBitAndBranchKind::NZ => TestBitAndBranchKind::Z, + } + } +} diff --git a/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/isa/aarch64/inst/emit.rs b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/isa/aarch64/inst/emit.rs new file mode 100644 index 000000000..adc7396c4 --- /dev/null +++ b/collector/compile-benchmarks/cranelift-codegen-0.119.0/src/isa/aarch64/inst/emit.rs @@ -0,0 +1,3578 @@ +//! AArch64 ISA: binary code emission. + +use cranelift_control::ControlPlane; + +use crate::ir::{self, types::*}; +use crate::isa::aarch64::inst::*; +use crate::trace; + +/// Memory addressing mode finalization: convert "special" modes (e.g., +/// generic arbitrary stack offset) into real addressing modes, possibly by +/// emitting some helper instructions that come immediately before the use +/// of this amode. +pub fn mem_finalize( + sink: Option<&mut MachBuffer>, + mem: &AMode, + access_ty: Type, + state: &EmitState, +) -> (SmallVec<[Inst; 4]>, AMode) { + match mem { + &AMode::RegOffset { off, .. } + | &AMode::SPOffset { off } + | &AMode::FPOffset { off } + | &AMode::IncomingArg { off } + | &AMode::SlotOffset { off } => { + let basereg = match mem { + &AMode::RegOffset { rn, .. } => rn, + &AMode::SPOffset { .. } + | &AMode::SlotOffset { .. } + | &AMode::IncomingArg { .. } => stack_reg(), + &AMode::FPOffset { .. } => fp_reg(), + _ => unreachable!(), + }; + let off = match mem { + &AMode::IncomingArg { .. } => { + let frame_layout = state.frame_layout(); + i64::from( + frame_layout.setup_area_size + + frame_layout.tail_args_size + + frame_layout.clobber_size + + frame_layout.fixed_frame_storage_size + + frame_layout.outgoing_args_size, + ) - off + } + &AMode::SlotOffset { .. } => { + let adj = i64::from(state.frame_layout().outgoing_args_size); + trace!( + "mem_finalize: slot offset {} + adj {} -> {}", + off, + adj, + off + adj + ); + off + adj + } + _ => off, + }; + + if let Some(simm9) = SImm9::maybe_from_i64(off) { + let mem = AMode::Unscaled { rn: basereg, simm9 }; + (smallvec![], mem) + } else if let Some(uimm12) = UImm12Scaled::maybe_from_i64(off, access_ty) { + let mem = AMode::UnsignedOffset { + rn: basereg, + uimm12, + }; + (smallvec![], mem) + } else { + let tmp = writable_spilltmp_reg(); + ( + Inst::load_constant(tmp, off as u64, &mut |_| tmp), + AMode::RegExtended { + rn: basereg, + rm: tmp.to_reg(), + extendop: ExtendOp::SXTX, + }, + ) + } + } + + AMode::Const { addr } => { + let sink = match sink { + Some(sink) => sink, + None => return (smallvec![], mem.clone()), + }; + let label = sink.get_label_for_constant(*addr); + let label = MemLabel::Mach(label); + (smallvec![], AMode::Label { label }) + } + + _ => (smallvec![], mem.clone()), + } +} + +//============================================================================= +// Instructions and subcomponents: emission + +pub(crate) fn machreg_to_gpr(m: Reg) -> u32 { + assert_eq!(m.class(), RegClass::Int); + u32::from(m.to_real_reg().unwrap().hw_enc() & 31) +} + +pub(crate) fn machreg_to_vec(m: Reg) -> u32 { + assert_eq!(m.class(), RegClass::Float); + u32::from(m.to_real_reg().unwrap().hw_enc()) +} + +fn machreg_to_gpr_or_vec(m: Reg) -> u32 { + u32::from(m.to_real_reg().unwrap().hw_enc() & 31) +} + +pub(crate) fn enc_arith_rrr( + bits_31_21: u32, + bits_15_10: u32, + rd: Writable, + rn: Reg, + rm: Reg, +) -> u32 { + (bits_31_21 << 21) + | (bits_15_10 << 10) + | machreg_to_gpr(rd.to_reg()) + | (machreg_to_gpr(rn) << 5) + | (machreg_to_gpr(rm) << 16) +} + +fn enc_arith_rr_imm12( + bits_31_24: u32, + immshift: u32, + imm12: u32, + rn: Reg, + rd: Writable, +) -> u32 { + (bits_31_24 << 24) + | (immshift << 22) + | (imm12 << 10) + | (machreg_to_gpr(rn) << 5) + | machreg_to_gpr(rd.to_reg()) +} + +fn enc_arith_rr_imml(bits_31_23: u32, imm_bits: u32, rn: Reg, rd: Writable) -> u32 { + (bits_31_23 << 23) | (imm_bits << 10) | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg()) +} + +fn enc_arith_rrrr(top11: u32, rm: Reg, bit15: u32, ra: Reg, rn: Reg, rd: Writable) -> u32 { + (top11 << 21) + | (machreg_to_gpr(rm) << 16) + | (bit15 << 15) + | (machreg_to_gpr(ra) << 10) + | (machreg_to_gpr(rn) << 5) + | machreg_to_gpr(rd.to_reg()) +} + +fn enc_jump26(op_31_26: u32, off_26_0: u32) -> u32 { + assert!(off_26_0 < (1 << 26)); + (op_31_26 << 26) | off_26_0 +} + +fn enc_cmpbr(op_31_24: u32, off_18_0: u32, reg: Reg) -> u32 { + assert!(off_18_0 < (1 << 19)); + (op_31_24 << 24) | (off_18_0 << 5) | machreg_to_gpr(reg) +} + +fn enc_cbr(op_31_24: u32, off_18_0: u32, op_4: u32, cond: u32) -> u32 { + assert!(off_18_0 < (1 << 19)); + assert!(cond < (1 << 4)); + (op_31_24 << 24) | (off_18_0 << 5) | (op_4 << 4) | cond +} + +/// Set the size bit of an instruction. +fn enc_op_size(op: u32, size: OperandSize) -> u32 { + (op & !(1 << 31)) | (size.sf_bit() << 31) +} + +fn enc_conditional_br(taken: BranchTarget, kind: CondBrKind) -> u32 { + match kind { + CondBrKind::Zero(reg, size) => enc_op_size( + enc_cmpbr(0b0_011010_0, taken.as_offset19_or_zero(), reg), + size, + ), + CondBrKind::NotZero(reg, size) => enc_op_size( + enc_cmpbr(0b0_011010_1, taken.as_offset19_or_zero(), reg), + size, + ), + CondBrKind::Cond(c) => enc_cbr(0b01010100, taken.as_offset19_or_zero(), 0b0, c.bits()), + } +} + +fn enc_test_bit_and_branch( + kind: TestBitAndBranchKind, + taken: BranchTarget, + reg: Reg, + bit: u8, +) -> u32 { + assert!(bit < 64); + let op_31 = u32::from(bit >> 5); + let op_23_19 = u32::from(bit & 0b11111); + let op_30_24 = 0b0110110 + | match kind { + TestBitAndBranchKind::Z => 0, + TestBitAndBranchKind::NZ => 1, + }; + (op_31 << 31) + | (op_30_24 << 24) + | (op_23_19 << 19) + | (taken.as_offset14_or_zero() << 5) + | machreg_to_gpr(reg) +} + +fn enc_move_wide(op: MoveWideOp, rd: Writable, imm: MoveWideConst, size: OperandSize) -> u32 { + assert!(imm.shift <= 0b11); + let op = match op { + MoveWideOp::MovN => 0b00, + MoveWideOp::MovZ => 0b10, + }; + 0x12800000 + | size.sf_bit() << 31 + | op << 29 + | u32::from(imm.shift) << 21 + | u32::from(imm.bits) << 5 + | machreg_to_gpr(rd.to_reg()) +} + +fn enc_movk(rd: Writable, imm: MoveWideConst, size: OperandSize) -> u32 { + assert!(imm.shift <= 0b11); + 0x72800000 + | size.sf_bit() << 31 + | u32::from(imm.shift) << 21 + | u32::from(imm.bits) << 5 + | machreg_to_gpr(rd.to_reg()) +} + +fn enc_ldst_pair(op_31_22: u32, simm7: SImm7Scaled, rn: Reg, rt: Reg, rt2: Reg) -> u32 { + (op_31_22 << 22) + | (simm7.bits() << 15) + | (machreg_to_gpr(rt2) << 10) + | (machreg_to_gpr(rn) << 5) + | machreg_to_gpr(rt) +} + +fn enc_ldst_simm9(op_31_22: u32, simm9: SImm9, op_11_10: u32, rn: Reg, rd: Reg) -> u32 { + (op_31_22 << 22) + | (simm9.bits() << 12) + | (op_11_10 << 10) + | (machreg_to_gpr(rn) << 5) + | machreg_to_gpr_or_vec(rd) +} + +fn enc_ldst_uimm12(op_31_22: u32, uimm12: UImm12Scaled, rn: Reg, rd: Reg) -> u32 { + (op_31_22 << 22) + | (0b1 << 24) + | (uimm12.bits() << 10) + | (machreg_to_gpr(rn) << 5) + | machreg_to_gpr_or_vec(rd) +} + +fn enc_ldst_reg( + op_31_22: u32, + rn: Reg, + rm: Reg, + s_bit: bool, + extendop: Option, + rd: Reg, +) -> u32 { + let s_bit = if s_bit { 1 } else { 0 }; + let extend_bits = match extendop { + Some(ExtendOp::UXTW) => 0b010, + Some(ExtendOp::SXTW) => 0b110, + Some(ExtendOp::SXTX) => 0b111, + None => 0b011, // LSL + _ => panic!("bad extend mode for ld/st AMode"), + }; + (op_31_22 << 22) + | (1 << 21) + | (machreg_to_gpr(rm) << 16) + | (extend_bits << 13) + | (s_bit << 12) + | (0b10 << 10) + | (machreg_to_gpr(rn) << 5) + | machreg_to_gpr_or_vec(rd) +} + +pub(crate) fn enc_ldst_imm19(op_31_24: u32, imm19: u32, rd: Reg) -> u32 { + (op_31_24 << 24) | (imm19 << 5) | machreg_to_gpr_or_vec(rd) +} + +fn enc_ldst_vec(q: u32, size: u32, rn: Reg, rt: Writable) -> u32 { + debug_assert_eq!(q & 0b1, q); + debug_assert_eq!(size & 0b11, size); + 0b0_0_0011010_10_00000_110_0_00_00000_00000 + | q << 30 + | size << 10 + | machreg_to_gpr(rn) << 5 + | machreg_to_vec(rt.to_reg()) +} + +fn enc_ldst_vec_pair( + opc: u32, + amode: u32, + is_load: bool, + simm7: SImm7Scaled, + rn: Reg, + rt: Reg, + rt2: Reg, +) -> u32 { + debug_assert_eq!(opc & 0b11, opc); + debug_assert_eq!(amode & 0b11, amode); + + 0b00_10110_00_0_0000000_00000_00000_00000 + | opc << 30 + | amode << 23 + | (is_load as u32) << 22 + | simm7.bits() << 15 + | machreg_to_vec(rt2) << 10 + | machreg_to_gpr(rn) << 5 + | machreg_to_vec(rt) +} + +fn enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable) -> u32 { + (top11 << 21) + | (machreg_to_vec(rm) << 16) + | (bit15_10 << 10) + | (machreg_to_vec(rn) << 5) + | machreg_to_vec(rd.to_reg()) +} + +fn enc_vec_rrr_long( + q: u32, + u: u32, + size: u32, + bit14: u32, + rm: Reg, + rn: Reg, + rd: Writable, +) -> u32 { + debug_assert_eq!(q & 0b1, q); + debug_assert_eq!(u & 0b1, u); + debug_assert_eq!(size & 0b11, size); + debug_assert_eq!(bit14 & 0b1, bit14); + + 0b0_0_0_01110_00_1_00000_100000_00000_00000 + | q << 30 + | u << 29 + | size << 22 + | bit14 << 14 + | (machreg_to_vec(rm) << 16) + | (machreg_to_vec(rn) << 5) + | machreg_to_vec(rd.to_reg()) +} + +fn enc_bit_rr(size: u32, opcode2: u32, opcode1: u32, rn: Reg, rd: Writable) -> u32 { + (0b01011010110 << 21) + | size << 31 + | opcode2 << 16 + | opcode1 << 10 + | machreg_to_gpr(rn) << 5 + | machreg_to_gpr(rd.to_reg()) +} + +pub(crate) fn enc_br(rn: Reg) -> u32 { + 0b1101011_0000_11111_000000_00000_00000 | (machreg_to_gpr(rn) << 5) +} + +pub(crate) fn enc_adr_inst(opcode: u32, off: i32, rd: Writable) -> u32 { + let off = u32::try_from(off).unwrap(); + let immlo = off & 3; + let immhi = (off >> 2) & ((1 << 19) - 1); + opcode | (immlo << 29) | (immhi << 5) | machreg_to_gpr(rd.to_reg()) +} + +pub(crate) fn enc_adr(off: i32, rd: Writable) -> u32 { + let opcode = 0b00010000 << 24; + enc_adr_inst(opcode, off, rd) +} + +pub(crate) fn enc_adrp(off: i32, rd: Writable) -> u32 { + let opcode = 0b10010000 << 24; + enc_adr_inst(opcode, off, rd) +} + +fn enc_csel(rd: Writable, rn: Reg, rm: Reg, cond: Cond, op: u32, o2: u32) -> u32 { + debug_assert_eq!(op & 0b1, op); + debug_assert_eq!(o2 & 0b1, o2); + 0b100_11010100_00000_0000_00_00000_00000 + | (op << 30) + | (machreg_to_gpr(rm) << 16) + | (cond.bits() << 12) + | (o2 << 10) + | (machreg_to_gpr(rn) << 5) + | machreg_to_gpr(rd.to_reg()) +} + +fn enc_fcsel(rd: Writable, rn: Reg, rm: Reg, cond: Cond, size: ScalarSize) -> u32 { + 0b000_11110_00_1_00000_0000_11_00000_00000 + | (size.ftype() << 22) + | (machreg_to_vec(rm) << 16) + | (machreg_to_vec(rn) << 5) + | machreg_to_vec(rd.to_reg()) + | (cond.bits() << 12) +} + +fn enc_ccmp(size: OperandSize, rn: Reg, rm: Reg, nzcv: NZCV, cond: Cond) -> u32 { + 0b0_1_1_11010010_00000_0000_00_00000_0_0000 + | size.sf_bit() << 31 + | machreg_to_gpr(rm) << 16 + | cond.bits() << 12 + | machreg_to_gpr(rn) << 5 + | nzcv.bits() +} + +fn enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) -> u32 { + 0b0_1_1_11010010_00000_0000_10_00000_0_0000 + | size.sf_bit() << 31 + | imm.bits() << 16 + | cond.bits() << 12 + | machreg_to_gpr(rn) << 5 + | nzcv.bits() +} + +fn enc_bfm(opc: u8, size: OperandSize, rd: Writable, rn: Reg, immr: u8, imms: u8) -> u32 { + match size { + OperandSize::Size64 => { + debug_assert!(immr <= 63); + debug_assert!(imms <= 63); + } + OperandSize::Size32 => { + debug_assert!(immr <= 31); + debug_assert!(imms <= 31); + } + } + debug_assert_eq!(opc & 0b11, opc); + let n_bit = size.sf_bit(); + 0b0_00_100110_0_000000_000000_00000_00000 + | size.sf_bit() << 31 + | u32::from(opc) << 29 + | n_bit << 22 + | u32::from(immr) << 16 + | u32::from(imms) << 10 + | machreg_to_gpr(rn) << 5 + | machreg_to_gpr(rd.to_reg()) +} + +fn enc_vecmov(is_16b: bool, rd: Writable, rn: Reg) -> u32 { + 0b00001110_101_00000_00011_1_00000_00000 + | ((is_16b as u32) << 30) + | machreg_to_vec(rd.to_reg()) + | (machreg_to_vec(rn) << 16) + | (machreg_to_vec(rn) << 5) +} + +fn enc_fpurr(top22: u32, rd: Writable, rn: Reg) -> u32 { + (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg()) +} + +fn enc_fpurrr(top22: u32, rd: Writable, rn: Reg, rm: Reg) -> u32 { + (top22 << 10) + | (machreg_to_vec(rm) << 16) + | (machreg_to_vec(rn) << 5) + | machreg_to_vec(rd.to_reg()) +} + +fn enc_fpurrrr(top17: u32, rd: Writable, rn: Reg, rm: Reg, ra: Reg) -> u32 { + (top17 << 15) + | (machreg_to_vec(rm) << 16) + | (machreg_to_vec(ra) << 10) + | (machreg_to_vec(rn) << 5) + | machreg_to_vec(rd.to_reg()) +} + +fn enc_fcmp(size: ScalarSize, rn: Reg, rm: Reg) -> u32 { + 0b000_11110_00_1_00000_00_1000_00000_00000 + | (size.ftype() << 22) + | (machreg_to_vec(rm) << 16) + | (machreg_to_vec(rn) << 5) +} + +fn enc_fputoint(top16: u32, rd: Writable, rn: Reg) -> u32 { + (top16 << 16) | (machreg_to_vec(rn) << 5) | machreg_to_gpr(rd.to_reg()) +} + +fn enc_inttofpu(top16: u32, rd: Writable, rn: Reg) -> u32 { + (top16 << 16) | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg()) +} + +fn enc_fround(top22: u32, rd: Writable, rn: Reg) -> u32 { + (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg()) +} + +fn enc_vec_rr_misc(qu: u32, size: u32, bits_12_16: u32, rd: Writable, rn: Reg) -> u32 { + debug_assert_eq!(qu & 0b11, qu); + debug_assert_eq!(size & 0b11, size); + debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16); + let bits = 0b0_00_01110_00_10000_00000_10_00000_00000; + bits | qu << 29 + | size << 22 + | bits_12_16 << 12 + | machreg_to_vec(rn) << 5 + | machreg_to_vec(rd.to_reg()) +} + +fn enc_vec_rr_pair(bits_12_16: u32, rd: Writable, rn: Reg) -> u32 { + debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16); + + 0b010_11110_11_11000_11011_10_00000_00000 + | bits_12_16 << 12 + | machreg_to_vec(rn) << 5 + | machreg_to_vec(rd.to_reg()) +} + +fn enc_vec_rr_pair_long(u: u32, enc_size: u32, rd: Writable, rn: Reg) -> u32 { + debug_assert_eq!(u & 0b1, u); + debug_assert_eq!(enc_size & 0b1, enc_size); + + 0b0_1_0_01110_00_10000_00_0_10_10_00000_00000 + | u << 29 + | enc_size << 22 + | machreg_to_vec(rn) << 5 + | machreg_to_vec(rd.to_reg()) +} + +fn enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable, rn: Reg) -> u32 { + debug_assert_eq!(q & 0b1, q); + debug_assert_eq!(u & 0b1, u); + debug_assert_eq!(size & 0b11, size); + debug_assert_eq!(opcode & 0b11111, opcode); + 0b0_0_0_01110_00_11000_0_0000_10_00000_00000 + | q << 30 + | u << 29 + | size << 22 + | opcode << 12 + | machreg_to_vec(rn) << 5 + | machreg_to_vec(rd.to_reg()) +} + +fn enc_tbl(is_extension: bool, len: u32, rd: Writable, rn: Reg, rm: Reg) -> u32 { + debug_assert_eq!(len & 0b11, len); + 0b0_1_001110_000_00000_0_00_0_00_00000_00000 + | (machreg_to_vec(rm) << 16) + | len << 13 + | (is_extension as u32) << 12 + | (machreg_to_vec(rn) << 5) + | machreg_to_vec(rd.to_reg()) +} + +fn enc_dmb_ish() -> u32 { + 0xD5033BBF +} + +fn enc_acq_rel(ty: Type, op: AtomicRMWOp, rs: Reg, rt: Writable, rn: Reg) -> u32 { + assert!(machreg_to_gpr(rt.to_reg()) != 31); + let sz = match ty { + I64 => 0b11, + I32 => 0b10, + I16 => 0b01, + I8 => 0b00, + _ => unreachable!(), + }; + let bit15 = match op { + AtomicRMWOp::Swp => 0b1, + _ => 0b0, + }; + let op = match op { + AtomicRMWOp::Add => 0b000, + AtomicRMWOp::Clr => 0b001, + AtomicRMWOp::Eor => 0b010, + AtomicRMWOp::Set => 0b011, + AtomicRMWOp::Smax => 0b100, + AtomicRMWOp::Smin => 0b101, + AtomicRMWOp::Umax => 0b110, + AtomicRMWOp::Umin => 0b111, + AtomicRMWOp::Swp => 0b000, + }; + 0b00_111_000_111_00000_0_000_00_00000_00000 + | (sz << 30) + | (machreg_to_gpr(rs) << 16) + | bit15 << 15 + | (op << 12) + | (machreg_to_gpr(rn) << 5) + | machreg_to_gpr(rt.to_reg()) +} + +fn enc_ldar(ty: Type, rt: Writable, rn: Reg) -> u32 { + let sz = match ty { + I64 => 0b11, + I32 => 0b10, + I16 => 0b01, + I8 => 0b00, + _ => unreachable!(), + }; + 0b00_001000_1_1_0_11111_1_11111_00000_00000 + | (sz << 30) + | (machreg_to_gpr(rn) << 5) + | machreg_to_gpr(rt.to_reg()) +} + +fn enc_stlr(ty: Type, rt: Reg, rn: Reg) -> u32 { + let sz = match ty { + I64 => 0b11, + I32 => 0b10, + I16 => 0b01, + I8 => 0b00, + _ => unreachable!(), + }; + 0b00_001000_100_11111_1_11111_00000_00000 + | (sz << 30) + | (machreg_to_gpr(rn) << 5) + | machreg_to_gpr(rt) +} + +fn enc_ldaxr(ty: Type, rt: Writable, rn: Reg) -> u32 { + let sz = match ty { + I64 => 0b11, + I32 => 0b10, + I16 => 0b01, + I8 => 0b00, + _ => unreachable!(), + }; + 0b00_001000_0_1_0_11111_1_11111_00000_00000 + | (sz << 30) + | (machreg_to_gpr(rn) << 5) + | machreg_to_gpr(rt.to_reg()) +} + +fn enc_stlxr(ty: Type, rs: Writable, rt: Reg, rn: Reg) -> u32 { + let sz = match ty { + I64 => 0b11, + I32 => 0b10, + I16 => 0b01, + I8 => 0b00, + _ => unreachable!(), + }; + 0b00_001000_000_00000_1_11111_00000_00000 + | (sz << 30) + | (machreg_to_gpr(rs.to_reg()) << 16) + | (machreg_to_gpr(rn) << 5) + | machreg_to_gpr(rt) +} + +fn enc_cas(size: u32, rs: Writable, rt: Reg, rn: Reg) -> u32 { + debug_assert_eq!(size & 0b11, size); + + 0b00_0010001_1_1_00000_1_11111_00000_00000 + | size << 30 + | machreg_to_gpr(rs.to_reg()) << 16 + | machreg_to_gpr(rn) << 5 + | machreg_to_gpr(rt) +} + +fn enc_asimd_mod_imm(rd: Writable, q_op: u32, cmode: u32, imm: u8) -> u32 { + let abc = (imm >> 5) as u32; + let defgh = (imm & 0b11111) as u32; + + debug_assert_eq!(cmode & 0b1111, cmode); + debug_assert_eq!(q_op & 0b11, q_op); + + 0b0_0_0_0111100000_000_0000_01_00000_00000 + | (q_op << 29) + | (abc << 16) + | (cmode << 12) + | (defgh << 5) + | machreg_to_vec(rd.to_reg()) +} + +/// State carried between emissions of a sequence of instructions. +#[derive(Default, Clone, Debug)] +pub struct EmitState { + /// The user stack map for the upcoming instruction, as provided to + /// `pre_safepoint()`. + user_stack_map: Option, + + /// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and + /// optimized away at compiletime. See [cranelift_control]. + ctrl_plane: ControlPlane, + + frame_layout: FrameLayout, +} + +impl MachInstEmitState for EmitState { + fn new(abi: &Callee, ctrl_plane: ControlPlane) -> Self { + EmitState { + user_stack_map: None, + ctrl_plane, + frame_layout: abi.frame_layout().clone(), + } + } + + fn pre_safepoint(&mut self, user_stack_map: Option) { + self.user_stack_map = user_stack_map; + } + + fn ctrl_plane_mut(&mut self) -> &mut ControlPlane { + &mut self.ctrl_plane + } + + fn take_ctrl_plane(self) -> ControlPlane { + self.ctrl_plane + } + + fn frame_layout(&self) -> &FrameLayout { + &self.frame_layout + } +} + +impl EmitState { + fn take_stack_map(&mut self) -> Option { + self.user_stack_map.take() + } + + fn clear_post_insn(&mut self) { + self.user_stack_map = None; + } +} + +/// Constant state used during function compilation. +pub struct EmitInfo(settings::Flags); + +impl EmitInfo { + /// Create a constant state for emission of instructions. + pub fn new(flags: settings::Flags) -> Self { + Self(flags) + } +} + +impl MachInstEmit for Inst { + type State = EmitState; + type Info = EmitInfo; + + fn emit(&self, sink: &mut MachBuffer, emit_info: &Self::Info, state: &mut EmitState) { + // N.B.: we *must* not exceed the "worst-case size" used to compute + // where to insert islands, except when islands are explicitly triggered + // (with an `EmitIsland`). We check this in debug builds. This is `mut` + // to allow disabling the check for `JTSequence`, which is always + // emitted following an `EmitIsland`. + let mut start_off = sink.cur_offset(); + + match self { + &Inst::AluRRR { + alu_op, + size, + rd, + rn, + rm, + } => { + debug_assert!(match alu_op { + ALUOp::SMulH | ALUOp::UMulH => size == OperandSize::Size64, + _ => true, + }); + let top11 = match alu_op { + ALUOp::Add => 0b00001011_000, + ALUOp::Adc => 0b00011010_000, + ALUOp::AdcS => 0b00111010_000, + ALUOp::Sub => 0b01001011_000, + ALUOp::Sbc => 0b01011010_000, + ALUOp::SbcS => 0b01111010_000, + ALUOp::Orr => 0b00101010_000, + ALUOp::And => 0b00001010_000, + ALUOp::AndS => 0b01101010_000, + ALUOp::Eor => 0b01001010_000, + ALUOp::OrrNot => 0b00101010_001, + ALUOp::AndNot => 0b00001010_001, + ALUOp::EorNot => 0b01001010_001, + ALUOp::AddS => 0b00101011_000, + ALUOp::SubS => 0b01101011_000, + ALUOp::SDiv | ALUOp::UDiv => 0b00011010_110, + ALUOp::Extr | ALUOp::Lsr | ALUOp::Asr | ALUOp::Lsl => 0b00011010_110, + ALUOp::SMulH => 0b10011011_010, + ALUOp::UMulH => 0b10011011_110, + }; + + let top11 = top11 | size.sf_bit() << 10; + let bit15_10 = match alu_op { + ALUOp::SDiv => 0b000011, + ALUOp::UDiv => 0b000010, + ALUOp::Extr => 0b001011, + ALUOp::Lsr => 0b001001, + ALUOp::Asr => 0b001010, + ALUOp::Lsl => 0b001000, + ALUOp::SMulH | ALUOp::UMulH => 0b011111, + _ => 0b000000, + }; + debug_assert_ne!(writable_stack_reg(), rd); + // The stack pointer is the zero register in this context, so this might be an + // indication that something is wrong. + debug_assert_ne!(stack_reg(), rn); + debug_assert_ne!(stack_reg(), rm); + sink.put4(enc_arith_rrr(top11, bit15_10, rd, rn, rm)); + } + &Inst::AluRRRR { + alu_op, + size, + rd, + rm, + rn, + ra, + } => { + let (top11, bit15) = match alu_op { + ALUOp3::MAdd => (0b0_00_11011_000, 0), + ALUOp3::MSub => (0b0_00_11011_000, 1), + ALUOp3::UMAddL => { + debug_assert!(size == OperandSize::Size32); + (0b1_00_11011_1_01, 0) + } + ALUOp3::SMAddL => { + debug_assert!(size == OperandSize::Size32); + (0b1_00_11011_0_01, 0) + } + }; + let top11 = top11 | size.sf_bit() << 10; + sink.put4(enc_arith_rrrr(top11, rm, bit15, ra, rn, rd)); + } + &Inst::AluRRImm12 { + alu_op, + size, + rd, + rn, + ref imm12, + } => { + let top8 = match alu_op { + ALUOp::Add => 0b000_10001, + ALUOp::Sub => 0b010_10001, + ALUOp::AddS => 0b001_10001, + ALUOp::SubS => 0b011_10001, + _ => unimplemented!("{:?}", alu_op), + }; + let top8 = top8 | size.sf_bit() << 7; + sink.put4(enc_arith_rr_imm12( + top8, + imm12.shift_bits(), + imm12.imm_bits(), + rn, + rd, + )); + } + &Inst::AluRRImmLogic { + alu_op, + size, + rd, + rn, + ref imml, + } => { + let (top9, inv) = match alu_op { + ALUOp::Orr => (0b001_100100, false), + ALUOp::And => (0b000_100100, false), + ALUOp::AndS => (0b011_100100, false), + ALUOp::Eor => (0b010_100100, false), + ALUOp::OrrNot => (0b001_100100, true), + ALUOp::AndNot => (0b000_100100, true), + ALUOp::EorNot => (0b010_100100, true), + _ => unimplemented!("{:?}", alu_op), + }; + let top9 = top9 | size.sf_bit() << 8; + let imml = if inv { imml.invert() } else { *imml }; + sink.put4(enc_arith_rr_imml(top9, imml.enc_bits(), rn, rd)); + } + + &Inst::AluRRImmShift { + alu_op, + size, + rd, + rn, + ref immshift, + } => { + let amt = immshift.value(); + let (top10, immr, imms) = match alu_op { + ALUOp::Extr => (0b0001001110, machreg_to_gpr(rn), u32::from(amt)), + ALUOp::Lsr => (0b0101001100, u32::from(amt), 0b011111), + ALUOp::Asr => (0b0001001100, u32::from(amt), 0b011111), + ALUOp::Lsl => { + let bits = if size.is64() { 64 } else { 32 }; + ( + 0b0101001100, + u32::from((bits - amt) % bits), + u32::from(bits - 1 - amt), + ) + } + _ => unimplemented!("{:?}", alu_op), + }; + let top10 = top10 | size.sf_bit() << 9 | size.sf_bit(); + let imms = match alu_op { + ALUOp::Lsr | ALUOp::Asr => imms | size.sf_bit() << 5, + _ => imms, + }; + sink.put4( + (top10 << 22) + | (immr << 16) + | (imms << 10) + | (machreg_to_gpr(rn) << 5) + | machreg_to_gpr(rd.to_reg()), + ); + } + + &Inst::AluRRRShift { + alu_op, + size, + rd, + rn, + rm, + ref shiftop, + } => { + let top11: u32 = match alu_op { + ALUOp::Add => 0b000_01011000, + ALUOp::AddS => 0b001_01011000, + ALUOp::Sub => 0b010_01011000, + ALUOp::SubS => 0b011_01011000, + ALUOp::Orr => 0b001_01010000, + ALUOp::And => 0b000_01010000, + ALUOp::AndS => 0b011_01010000, + ALUOp::Eor => 0b010_01010000, + ALUOp::OrrNot => 0b001_01010001, + ALUOp::EorNot => 0b010_01010001, + ALUOp::AndNot => 0b000_01010001, + ALUOp::Extr => 0b000_10011100, + _ => unimplemented!("{:?}", alu_op), + }; + let top11 = top11 | size.sf_bit() << 10; + let top11 = top11 | (u32::from(shiftop.op().bits()) << 1); + let bits_15_10 = u32::from(shiftop.amt().value()); + sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm)); + } + + &Inst::AluRRRExtend { + alu_op, + size, + rd, + rn, + rm, + extendop, + } => { + let top11: u32 = match alu_op { + ALUOp::Add => 0b00001011001, + ALUOp::Sub => 0b01001011001, + ALUOp::AddS => 0b00101011001, + ALUOp::SubS => 0b01101011001, + _ => unimplemented!("{:?}", alu_op), + }; + let top11 = top11 | size.sf_bit() << 10; + let bits_15_10 = u32::from(extendop.bits()) << 3; + sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm)); + } + + &Inst::BitRR { + op, size, rd, rn, .. + } => { + let (op1, op2) = match op { + BitOp::RBit => (0b00000, 0b000000), + BitOp::Clz => (0b00000, 0b000100), + BitOp::Cls => (0b00000, 0b000101), + BitOp::Rev16 => (0b00000, 0b000001), + BitOp::Rev32 => (0b00000, 0b000010), + BitOp::Rev64 => (0b00000, 0b000011), + }; + sink.put4(enc_bit_rr(size.sf_bit(), op1, op2, rn, rd)) + } + + &Inst::ULoad8 { rd, ref mem, flags } + | &Inst::SLoad8 { rd, ref mem, flags } + | &Inst::ULoad16 { rd, ref mem, flags } + | &Inst::SLoad16 { rd, ref mem, flags } + | &Inst::ULoad32 { rd, ref mem, flags } + | &Inst::SLoad32 { rd, ref mem, flags } + | &Inst::ULoad64 { + rd, ref mem, flags, .. + } + | &Inst::FpuLoad16 { rd, ref mem, flags } + | &Inst::FpuLoad32 { rd, ref mem, flags } + | &Inst::FpuLoad64 { rd, ref mem, flags } + | &Inst::FpuLoad128 { rd, ref mem, flags } => { + let mem = mem.clone(); + let access_ty = self.mem_type().unwrap(); + let (mem_insts, mem) = mem_finalize(Some(sink), &mem, access_ty, state); + + for inst in mem_insts.into_iter() { + inst.emit(sink, emit_info, state); + } + + // ldst encoding helpers take Reg, not Writable. + let rd = rd.to_reg(); + + // This is the base opcode (top 10 bits) for the "unscaled + // immediate" form (Unscaled). Other addressing modes will OR in + // other values for bits 24/25 (bits 1/2 of this constant). + let op = match self { + Inst::ULoad8 { .. } => 0b0011100001, + Inst::SLoad8 { .. } => 0b0011100010, + Inst::ULoad16 { .. } => 0b0111100001, + Inst::SLoad16 { .. } => 0b0111100010, + Inst::ULoad32 { .. } => 0b1011100001, + Inst::SLoad32 { .. } => 0b1011100010, + Inst::ULoad64 { .. } => 0b1111100001, + Inst::FpuLoad16 { .. } => 0b0111110001, + Inst::FpuLoad32 { .. } => 0b1011110001, + Inst::FpuLoad64 { .. } => 0b1111110001, + Inst::FpuLoad128 { .. } => 0b0011110011, + _ => unreachable!(), + }; + + if let Some(trap_code) = flags.trap_code() { + // Register the offset at which the actual load instruction starts. + sink.add_trap(trap_code); + } + + match &mem { + &AMode::Unscaled { rn, simm9 } => { + let reg = rn; + sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd)); + } + &AMode::UnsignedOffset { rn, uimm12 } => { + let reg = rn; + sink.put4(enc_ldst_uimm12(op, uimm12, reg, rd)); + } + &AMode::RegReg { rn, rm } => { + let r1 = rn; + let r2 = rm; + sink.put4(enc_ldst_reg( + op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd, + )); + } + &AMode::RegScaled { rn, rm } | &AMode::RegScaledExtended { rn, rm, .. } => { + let r1 = rn; + let r2 = rm; + let extendop = match &mem { + &AMode::RegScaled { .. } => None, + &AMode::RegScaledExtended { extendop, .. } => Some(extendop), + _ => unreachable!(), + }; + sink.put4(enc_ldst_reg( + op, r1, r2, /* scaled = */ true, extendop, rd, + )); + } + &AMode::RegExtended { rn, rm, extendop } => { + let r1 = rn; + let r2 = rm; + sink.put4(enc_ldst_reg( + op, + r1, + r2, + /* scaled = */ false, + Some(extendop), + rd, + )); + } + &AMode::Label { ref label } => { + let offset = match label { + // cast i32 to u32 (two's-complement) + MemLabel::PCRel(off) => *off as u32, + // Emit a relocation into the `MachBuffer` + // for the label that's being loaded from and + // encode an address of 0 in its place which will + // get filled in by relocation resolution later on. + MemLabel::Mach(label) => { + sink.use_label_at_offset( + sink.cur_offset(), + *label, + LabelUse::Ldr19, + ); + 0 + } + } / 4; + assert!(offset < (1 << 19)); + match self { + &Inst::ULoad32 { .. } => { + sink.put4(enc_ldst_imm19(0b00011000, offset, rd)); + } + &Inst::SLoad32 { .. } => { + sink.put4(enc_ldst_imm19(0b10011000, offset, rd)); + } + &Inst::FpuLoad32 { .. } => { + sink.put4(enc_ldst_imm19(0b00011100, offset, rd)); + } + &Inst::ULoad64 { .. } => { + sink.put4(enc_ldst_imm19(0b01011000, offset, rd)); + } + &Inst::FpuLoad64 { .. } => { + sink.put4(enc_ldst_imm19(0b01011100, offset, rd)); + } + &Inst::FpuLoad128 { .. } => { + sink.put4(enc_ldst_imm19(0b10011100, offset, rd)); + } + _ => panic!("Unsupported size for LDR from constant pool!"), + } + } + &AMode::SPPreIndexed { simm9 } => { + let reg = stack_reg(); + sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg, rd)); + } + &AMode::SPPostIndexed { simm9 } => { + let reg = stack_reg(); + sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg, rd)); + } + // Eliminated by `mem_finalize()` above. + &AMode::SPOffset { .. } + | &AMode::FPOffset { .. } + | &AMode::IncomingArg { .. } + | &AMode::SlotOffset { .. } + | &AMode::Const { .. } + | &AMode::RegOffset { .. } => { + panic!("Should not see {mem:?} here!") + } + } + } + + &Inst::Store8 { rd, ref mem, flags } + | &Inst::Store16 { rd, ref mem, flags } + | &Inst::Store32 { rd, ref mem, flags } + | &Inst::Store64 { rd, ref mem, flags } + | &Inst::FpuStore16 { rd, ref mem, flags } + | &Inst::FpuStore32 { rd, ref mem, flags } + | &Inst::FpuStore64 { rd, ref mem, flags } + | &Inst::FpuStore128 { rd, ref mem, flags } => { + let mem = mem.clone(); + let access_ty = self.mem_type().unwrap(); + let (mem_insts, mem) = mem_finalize(Some(sink), &mem, access_ty, state); + + for inst in mem_insts.into_iter() { + inst.emit(sink, emit_info, state); + } + + let op = match self { + Inst::Store8 { .. } => 0b0011100000, + Inst::Store16 { .. } => 0b0111100000, + Inst::Store32 { .. } => 0b1011100000, + Inst::Store64 { .. } => 0b1111100000, + Inst::FpuStore16 { .. } => 0b0111110000, + Inst::FpuStore32 { .. } => 0b1011110000, + Inst::FpuStore64 { .. } => 0b1111110000, + Inst::FpuStore128 { .. } => 0b0011110010, + _ => unreachable!(), + }; + + if let Some(trap_code) = flags.trap_code() { + // Register the offset at which the actual store instruction starts. + sink.add_trap(trap_code); + } + + match &mem { + &AMode::Unscaled { rn, simm9 } => { + let reg = rn; + sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd)); + } + &AMode::UnsignedOffset { rn, uimm12 } => { + let reg = rn; + sink.put4(enc_ldst_uimm12(op, uimm12, reg, rd)); + } + &AMode::RegReg { rn, rm } => { + let r1 = rn; + let r2 = rm; + sink.put4(enc_ldst_reg( + op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd, + )); + } + &AMode::RegScaled { rn, rm } | &AMode::RegScaledExtended { rn, rm, .. } => { + let r1 = rn; + let r2 = rm; + let extendop = match &mem { + &AMode::RegScaled { .. } => None, + &AMode::RegScaledExtended { extendop, .. } => Some(extendop), + _ => unreachable!(), + }; + sink.put4(enc_ldst_reg( + op, r1, r2, /* scaled = */ true, extendop, rd, + )); + } + &AMode::RegExtended { rn, rm, extendop } => { + let r1 = rn; + let r2 = rm; + sink.put4(enc_ldst_reg( + op, + r1, + r2, + /* scaled = */ false, + Some(extendop), + rd, + )); + } + &AMode::Label { .. } => { + panic!("Store to a MemLabel not implemented!"); + } + &AMode::SPPreIndexed { simm9 } => { + let reg = stack_reg(); + sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg, rd)); + } + &AMode::SPPostIndexed { simm9 } => { + let reg = stack_reg(); + sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg, rd)); + } + // Eliminated by `mem_finalize()` above. + &AMode::SPOffset { .. } + | &AMode::FPOffset { .. } + | &AMode::IncomingArg { .. } + | &AMode::SlotOffset { .. } + | &AMode::Const { .. } + | &AMode::RegOffset { .. } => { + panic!("Should not see {mem:?} here!") + } + } + } + + &Inst::StoreP64 { + rt, + rt2, + ref mem, + flags, + } => { + let mem = mem.clone(); + if let Some(trap_code) = flags.trap_code() { + // Register the offset at which the actual store instruction starts. + sink.add_trap(trap_code); + } + match &mem { + &PairAMode::SignedOffset { reg, simm7 } => { + assert_eq!(simm7.scale_ty, I64); + sink.put4(enc_ldst_pair(0b1010100100, simm7, reg, rt, rt2)); + } + &PairAMode::SPPreIndexed { simm7 } => { + assert_eq!(simm7.scale_ty, I64); + let reg = stack_reg(); + sink.put4(enc_ldst_pair(0b1010100110, simm7, reg, rt, rt2)); + } + &PairAMode::SPPostIndexed { simm7 } => { + assert_eq!(simm7.scale_ty, I64); + let reg = stack_reg(); + sink.put4(enc_ldst_pair(0b1010100010, simm7, reg, rt, rt2)); + } + } + } + &Inst::LoadP64 { + rt, + rt2, + ref mem, + flags, + } => { + let rt = rt.to_reg(); + let rt2 = rt2.to_reg(); + let mem = mem.clone(); + if let Some(trap_code) = flags.trap_code() { + // Register the offset at which the actual load instruction starts. + sink.add_trap(trap_code); + } + + match &mem { + &PairAMode::SignedOffset { reg, simm7 } => { + assert_eq!(simm7.scale_ty, I64); + sink.put4(enc_ldst_pair(0b1010100101, simm7, reg, rt, rt2)); + } + &PairAMode::SPPreIndexed { simm7 } => { + assert_eq!(simm7.scale_ty, I64); + let reg = stack_reg(); + sink.put4(enc_ldst_pair(0b1010100111, simm7, reg, rt, rt2)); + } + &PairAMode::SPPostIndexed { simm7 } => { + assert_eq!(simm7.scale_ty, I64); + let reg = stack_reg(); + sink.put4(enc_ldst_pair(0b1010100011, simm7, reg, rt, rt2)); + } + } + } + &Inst::FpuLoadP64 { + rt, + rt2, + ref mem, + flags, + } + | &Inst::FpuLoadP128 { + rt, + rt2, + ref mem, + flags, + } => { + let rt = rt.to_reg(); + let rt2 = rt2.to_reg(); + let mem = mem.clone(); + + if let Some(trap_code) = flags.trap_code() { + // Register the offset at which the actual load instruction starts. + sink.add_trap(trap_code); + } + + let opc = match self { + &Inst::FpuLoadP64 { .. } => 0b01, + &Inst::FpuLoadP128 { .. } => 0b10, + _ => unreachable!(), + }; + + match &mem { + &PairAMode::SignedOffset { reg, simm7 } => { + assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16); + sink.put4(enc_ldst_vec_pair(opc, 0b10, true, simm7, reg, rt, rt2)); + } + &PairAMode::SPPreIndexed { simm7 } => { + assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16); + let reg = stack_reg(); + sink.put4(enc_ldst_vec_pair(opc, 0b11, true, simm7, reg, rt, rt2)); + } + &PairAMode::SPPostIndexed { simm7 } => { + assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16); + let reg = stack_reg(); + sink.put4(enc_ldst_vec_pair(opc, 0b01, true, simm7, reg, rt, rt2)); + } + } + } + &Inst::FpuStoreP64 { + rt, + rt2, + ref mem, + flags, + } + | &Inst::FpuStoreP128 { + rt, + rt2, + ref mem, + flags, + } => { + let mem = mem.clone(); + + if let Some(trap_code) = flags.trap_code() { + // Register the offset at which the actual store instruction starts. + sink.add_trap(trap_code); + } + + let opc = match self { + &Inst::FpuStoreP64 { .. } => 0b01, + &Inst::FpuStoreP128 { .. } => 0b10, + _ => unreachable!(), + }; + + match &mem { + &PairAMode::SignedOffset { reg, simm7 } => { + assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16); + sink.put4(enc_ldst_vec_pair(opc, 0b10, false, simm7, reg, rt, rt2)); + } + &PairAMode::SPPreIndexed { simm7 } => { + assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16); + let reg = stack_reg(); + sink.put4(enc_ldst_vec_pair(opc, 0b11, false, simm7, reg, rt, rt2)); + } + &PairAMode::SPPostIndexed { simm7 } => { + assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16); + let reg = stack_reg(); + sink.put4(enc_ldst_vec_pair(opc, 0b01, false, simm7, reg, rt, rt2)); + } + } + } + &Inst::Mov { size, rd, rm } => { + assert!(rd.to_reg().class() == rm.class()); + assert!(rm.class() == RegClass::Int); + + match size { + OperandSize::Size64 => { + // MOV to SP is interpreted as MOV to XZR instead. And our codegen + // should never MOV to XZR. + assert!(rd.to_reg() != stack_reg()); + + if rm == stack_reg() { + // We can't use ORR here, so use an `add rd, sp, #0` instead. + let imm12 = Imm12::maybe_from_u64(0).unwrap(); + sink.put4(enc_arith_rr_imm12( + 0b100_10001, + imm12.shift_bits(), + imm12.imm_bits(), + rm, + rd, + )); + } else { + // Encoded as ORR rd, rm, zero. + sink.put4(enc_arith_rrr(0b10101010_000, 0b000_000, rd, zero_reg(), rm)); + } + } + OperandSize::Size32 => { + // MOV to SP is interpreted as MOV to XZR instead. And our codegen + // should never MOV to XZR. + assert!(machreg_to_gpr(rd.to_reg()) != 31); + // Encoded as ORR rd, rm, zero. + sink.put4(enc_arith_rrr(0b00101010_000, 0b000_000, rd, zero_reg(), rm)); + } + } + } + &Inst::MovFromPReg { rd, rm } => { + let rm: Reg = rm.into(); + debug_assert!([ + regs::fp_reg(), + regs::stack_reg(), + regs::link_reg(), + regs::pinned_reg() + ] + .contains(&rm)); + assert!(rm.class() == RegClass::Int); + assert!(rd.to_reg().class() == rm.class()); + let size = OperandSize::Size64; + Inst::Mov { size, rd, rm }.emit(sink, emit_info, state); + } + &Inst::MovToPReg { rd, rm } => { + let rd: Writable = Writable::from_reg(rd.into()); + debug_assert!([ + regs::fp_reg(), + regs::stack_reg(), + regs::link_reg(), + regs::pinned_reg() + ] + .contains(&rd.to_reg())); + assert!(rd.to_reg().class() == RegClass::Int); + assert!(rm.class() == rd.to_reg().class()); + let size = OperandSize::Size64; + Inst::Mov { size, rd, rm }.emit(sink, emit_info, state); + } + &Inst::MovWide { op, rd, imm, size } => { + sink.put4(enc_move_wide(op, rd, imm, size)); + } + &Inst::MovK { rd, rn, imm, size } => { + debug_assert_eq!(rn, rd.to_reg()); + sink.put4(enc_movk(rd, imm, size)); + } + &Inst::CSel { rd, rn, rm, cond } => { + sink.put4(enc_csel(rd, rn, rm, cond, 0, 0)); + } + &Inst::CSNeg { rd, rn, rm, cond } => { + sink.put4(enc_csel(rd, rn, rm, cond, 1, 1)); + } + &Inst::CSet { rd, cond } => { + sink.put4(enc_csel(rd, zero_reg(), zero_reg(), cond.invert(), 0, 1)); + } + &Inst::CSetm { rd, cond } => { + sink.put4(enc_csel(rd, zero_reg(), zero_reg(), cond.invert(), 1, 0)); + } + &Inst::CCmp { + size, + rn, + rm, + nzcv, + cond, + } => { + sink.put4(enc_ccmp(size, rn, rm, nzcv, cond)); + } + &Inst::CCmpImm { + size, + rn, + imm, + nzcv, + cond, + } => { + sink.put4(enc_ccmp_imm(size, rn, imm, nzcv, cond)); + } + &Inst::AtomicRMW { + ty, + op, + rs, + rt, + rn, + flags, + } => { + if let Some(trap_code) = flags.trap_code() { + sink.add_trap(trap_code); + } + + sink.put4(enc_acq_rel(ty, op, rs, rt, rn)); + } + &Inst::AtomicRMWLoop { ty, op, flags, .. } => { + /* Emit this: + again: + ldaxr{,b,h} x/w27, [x25] + // maybe sign extend + op x28, x27, x26 // op is add,sub,and,orr,eor + stlxr{,b,h} w24, x/w28, [x25] + cbnz x24, again + + Operand conventions: + IN: x25 (addr), x26 (2nd arg for op) + OUT: x27 (old value), x24 (trashed), x28 (trashed) + + It is unfortunate that, per the ARM documentation, x28 cannot be used for + both the store-data and success-flag operands of stlxr. This causes the + instruction's behaviour to be "CONSTRAINED UNPREDICTABLE", so we use x24 + instead for the success-flag. + */ + // TODO: We should not hardcode registers here, a better idea would be to + // pass some scratch registers in the AtomicRMWLoop pseudo-instruction, and use those + let xzr = zero_reg(); + let x24 = xreg(24); + let x25 = xreg(25); + let x26 = xreg(26); + let x27 = xreg(27); + let x28 = xreg(28); + let x24wr = writable_xreg(24); + let x27wr = writable_xreg(27); + let x28wr = writable_xreg(28); + let again_label = sink.get_label(); + + // again: + sink.bind_label(again_label, &mut state.ctrl_plane); + + if let Some(trap_code) = flags.trap_code() { + sink.add_trap(trap_code); + } + + sink.put4(enc_ldaxr(ty, x27wr, x25)); // ldaxr x27, [x25] + let size = OperandSize::from_ty(ty); + let sign_ext = match op { + AtomicRMWLoopOp::Smin | AtomicRMWLoopOp::Smax => match ty { + I16 => Some((ExtendOp::SXTH, 16)), + I8 => Some((ExtendOp::SXTB, 8)), + _ => None, + }, + _ => None, + }; + + // sxt{b|h} the loaded result if necessary. + if sign_ext.is_some() { + let (_, from_bits) = sign_ext.unwrap(); + Inst::Extend { + rd: x27wr, + rn: x27, + signed: true, + from_bits, + to_bits: size.bits(), + } + .emit(sink, emit_info, state); + } + + match op { + AtomicRMWLoopOp::Xchg => {} // do nothing + AtomicRMWLoopOp::Nand => { + // and x28, x27, x26 + // mvn x28, x28 + + Inst::AluRRR { + alu_op: ALUOp::And, + size, + rd: x28wr, + rn: x27, + rm: x26, + } + .emit(sink, emit_info, state); + + Inst::AluRRR { + alu_op: ALUOp::OrrNot, + size, + rd: x28wr, + rn: xzr, + rm: x28, + } + .emit(sink, emit_info, state); + } + AtomicRMWLoopOp::Umin + | AtomicRMWLoopOp::Umax + | AtomicRMWLoopOp::Smin + | AtomicRMWLoopOp::Smax => { + // cmp x27, x26 {?sxt} + // csel.op x28, x27, x26 + + let cond = match op { + AtomicRMWLoopOp::Umin => Cond::Lo, + AtomicRMWLoopOp::Umax => Cond::Hi, + AtomicRMWLoopOp::Smin => Cond::Lt, + AtomicRMWLoopOp::Smax => Cond::Gt, + _ => unreachable!(), + }; + + if sign_ext.is_some() { + let (extendop, _) = sign_ext.unwrap(); + Inst::AluRRRExtend { + alu_op: ALUOp::SubS, + size, + rd: writable_zero_reg(), + rn: x27, + rm: x26, + extendop, + } + .emit(sink, emit_info, state); + } else { + Inst::AluRRR { + alu_op: ALUOp::SubS, + size, + rd: writable_zero_reg(), + rn: x27, + rm: x26, + } + .emit(sink, emit_info, state); + } + + Inst::CSel { + cond, + rd: x28wr, + rn: x27, + rm: x26, + } + .emit(sink, emit_info, state); + } + _ => { + // add/sub/and/orr/eor x28, x27, x26 + let alu_op = match op { + AtomicRMWLoopOp::Add => ALUOp::Add, + AtomicRMWLoopOp::Sub => ALUOp::Sub, + AtomicRMWLoopOp::And => ALUOp::And, + AtomicRMWLoopOp::Orr => ALUOp::Orr, + AtomicRMWLoopOp::Eor => ALUOp::Eor, + AtomicRMWLoopOp::Nand + | AtomicRMWLoopOp::Umin + | AtomicRMWLoopOp::Umax + | AtomicRMWLoopOp::Smin + | AtomicRMWLoopOp::Smax + | AtomicRMWLoopOp::Xchg => unreachable!(), + }; + + Inst::AluRRR { + alu_op, + size, + rd: x28wr, + rn: x27, + rm: x26, + } + .emit(sink, emit_info, state); + } + } + + if let Some(trap_code) = flags.trap_code() { + sink.add_trap(trap_code); + } + if op == AtomicRMWLoopOp::Xchg { + sink.put4(enc_stlxr(ty, x24wr, x26, x25)); // stlxr w24, x26, [x25] + } else { + sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25] + } + + // cbnz w24, again + // Note, we're actually testing x24, and relying on the default zero-high-half + // rule in the assignment that `stlxr` does. + let br_offset = sink.cur_offset(); + sink.put4(enc_conditional_br( + BranchTarget::Label(again_label), + CondBrKind::NotZero(x24, OperandSize::Size64), + )); + sink.use_label_at_offset(br_offset, again_label, LabelUse::Branch19); + } + &Inst::AtomicCAS { + rd, + rs, + rt, + rn, + ty, + flags, + } => { + debug_assert_eq!(rd.to_reg(), rs); + let size = match ty { + I8 => 0b00, + I16 => 0b01, + I32 => 0b10, + I64 => 0b11, + _ => panic!("Unsupported type: {ty}"), + }; + + if let Some(trap_code) = flags.trap_code() { + sink.add_trap(trap_code); + } + + sink.put4(enc_cas(size, rd, rt, rn)); + } + &Inst::AtomicCASLoop { ty, flags, .. } => { + /* Emit this: + again: + ldaxr{,b,h} x/w27, [x25] + cmp x27, x/w26 uxt{b,h} + b.ne out + stlxr{,b,h} w24, x/w28, [x25] + cbnz x24, again + out: + + Operand conventions: + IN: x25 (addr), x26 (expected value), x28 (replacement value) + OUT: x27 (old value), x24 (trashed) + */ + let x24 = xreg(24); + let x25 = xreg(25); + let x26 = xreg(26); + let x27 = xreg(27); + let x28 = xreg(28); + let xzrwr = writable_zero_reg(); + let x24wr = writable_xreg(24); + let x27wr = writable_xreg(27); + let again_label = sink.get_label(); + let out_label = sink.get_label(); + + // again: + sink.bind_label(again_label, &mut state.ctrl_plane); + + if let Some(trap_code) = flags.trap_code() { + sink.add_trap(trap_code); + } + + // ldaxr x27, [x25] + sink.put4(enc_ldaxr(ty, x27wr, x25)); + + // The top 32-bits are zero-extended by the ldaxr so we don't + // have to use UXTW, just the x-form of the register. + let (bit21, extend_op) = match ty { + I8 => (0b1, 0b000000), + I16 => (0b1, 0b001000), + _ => (0b0, 0b000000), + }; + let bits_31_21 = 0b111_01011_000 | bit21; + // cmp x27, x26 (== subs xzr, x27, x26) + sink.put4(enc_arith_rrr(bits_31_21, extend_op, xzrwr, x27, x26)); + + // b.ne out + let br_out_offset = sink.cur_offset(); + sink.put4(enc_conditional_br( + BranchTarget::Label(out_label), + CondBrKind::Cond(Cond::Ne), + )); + sink.use_label_at_offset(br_out_offset, out_label, LabelUse::Branch19); + + if let Some(trap_code) = flags.trap_code() { + sink.add_trap(trap_code); + } + + sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25] + + // cbnz w24, again. + // Note, we're actually testing x24, and relying on the default zero-high-half + // rule in the assignment that `stlxr` does. + let br_again_offset = sink.cur_offset(); + sink.put4(enc_conditional_br( + BranchTarget::Label(again_label), + CondBrKind::NotZero(x24, OperandSize::Size64), + )); + sink.use_label_at_offset(br_again_offset, again_label, LabelUse::Branch19); + + // out: + sink.bind_label(out_label, &mut state.ctrl_plane); + } + &Inst::LoadAcquire { + access_ty, + rt, + rn, + flags, + } => { + if let Some(trap_code) = flags.trap_code() { + sink.add_trap(trap_code); + } + + sink.put4(enc_ldar(access_ty, rt, rn)); + } + &Inst::StoreRelease { + access_ty, + rt, + rn, + flags, + } => { + if let Some(trap_code) = flags.trap_code() { + sink.add_trap(trap_code); + } + + sink.put4(enc_stlr(access_ty, rt, rn)); + } + &Inst::Fence {} => { + sink.put4(enc_dmb_ish()); // dmb ish + } + &Inst::Csdb {} => { + sink.put4(0xd503229f); + } + &Inst::FpuMove32 { rd, rn } => { + sink.put4(enc_fpurr(0b000_11110_00_1_000000_10000, rd, rn)); + } + &Inst::FpuMove64 { rd, rn } => { + sink.put4(enc_fpurr(0b000_11110_01_1_000000_10000, rd, rn)); + } + &Inst::FpuMove128 { rd, rn } => { + sink.put4(enc_vecmov(/* 16b = */ true, rd, rn)); + } + &Inst::FpuMoveFromVec { rd, rn, idx, size } => { + let (imm5, shift, mask) = match size.lane_size() { + ScalarSize::Size32 => (0b00100, 3, 0b011), + ScalarSize::Size64 => (0b01000, 4, 0b001), + _ => unimplemented!(), + }; + debug_assert_eq!(idx & mask, idx); + let imm5 = imm5 | ((idx as u32) << shift); + sink.put4( + 0b010_11110000_00000_000001_00000_00000 + | (imm5 << 16) + | (machreg_to_vec(rn) << 5) + | machreg_to_vec(rd.to_reg()), + ); + } + &Inst::FpuExtend { rd, rn, size } => { + sink.put4(enc_fpurr( + 0b000_11110_00_1_000000_10000 | (size.ftype() << 12), + rd, + rn, + )); + } + &Inst::FpuRR { + fpu_op, + size, + rd, + rn, + } => { + let top22 = match fpu_op { + FPUOp1::Abs => 0b000_11110_00_1_000001_10000, + FPUOp1::Neg => 0b000_11110_00_1_000010_10000, + FPUOp1::Sqrt => 0b000_11110_00_1_000011_10000, + FPUOp1::Cvt32To64 => { + debug_assert_eq!(size, ScalarSize::Size32); + 0b000_11110_00_1_000101_10000 + } + FPUOp1::Cvt64To32 => { + debug_assert_eq!(size, ScalarSize::Size64); + 0b000_11110_01_1_000100_10000 + } + }; + let top22 = top22 | size.ftype() << 12; + sink.put4(enc_fpurr(top22, rd, rn)); + } + &Inst::FpuRRR { + fpu_op, + size, + rd, + rn, + rm, + } => { + let top22 = match fpu_op { + FPUOp2::Add => 0b000_11110_00_1_00000_001010, + FPUOp2::Sub => 0b000_11110_00_1_00000_001110, + FPUOp2::Mul => 0b000_11110_00_1_00000_000010, + FPUOp2::Div => 0b000_11110_00_1_00000_000110, + FPUOp2::Max => 0b000_11110_00_1_00000_010010, + FPUOp2::Min => 0b000_11110_00_1_00000_010110, + }; + let top22 = top22 | size.ftype() << 12; + sink.put4(enc_fpurrr(top22, rd, rn, rm)); + } + &Inst::FpuRRI { fpu_op, rd, rn } => match fpu_op { + FPUOpRI::UShr32(imm) => { + debug_assert_eq!(32, imm.lane_size_in_bits); + sink.put4( + 0b0_0_1_011110_0000000_00_0_0_0_1_00000_00000 + | imm.enc() << 16 + | machreg_to_vec(rn) << 5 + | machreg_to_vec(rd.to_reg()), + ) + } + FPUOpRI::UShr64(imm) => { + debug_assert_eq!(64, imm.lane_size_in_bits); + sink.put4( + 0b01_1_111110_0000000_00_0_0_0_1_00000_00000 + | imm.enc() << 16 + | machreg_to_vec(rn) << 5 + | machreg_to_vec(rd.to_reg()), + ) + } + }, + &Inst::FpuRRIMod { fpu_op, rd, ri, rn } => { + debug_assert_eq!(rd.to_reg(), ri); + match fpu_op { + FPUOpRIMod::Sli64(imm) => { + debug_assert_eq!(64, imm.lane_size_in_bits); + sink.put4( + 0b01_1_111110_0000000_010101_00000_00000 + | imm.enc() << 16 + | machreg_to_vec(rn) << 5 + | machreg_to_vec(rd.to_reg()), + ) + } + FPUOpRIMod::Sli32(imm) => { + debug_assert_eq!(32, imm.lane_size_in_bits); + sink.put4( + 0b0_0_1_011110_0000000_010101_00000_00000 + | imm.enc() << 16 + | machreg_to_vec(rn) << 5 + | machreg_to_vec(rd.to_reg()), + ) + } + } + } + &Inst::FpuRRRR { + fpu_op, + size, + rd, + rn, + rm, + ra, + } => { + let top17 = match fpu_op { + FPUOp3::MAdd => 0b000_11111_00_0_00000_0, + FPUOp3::MSub => 0b000_11111_00_0_00000_1, + FPUOp3::NMAdd => 0b000_11111_00_1_00000_0, + FPUOp3::NMSub => 0b000_11111_00_1_00000_1, + }; + let top17 = top17 | size.ftype() << 7; + sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra)); + } + &Inst::VecMisc { op, rd, rn, size } => { + let (q, enc_size) = size.enc_size(); + let (u, bits_12_16, size) = match op { + VecMisc2::Not => (0b1, 0b00101, 0b00), + VecMisc2::Neg => (0b1, 0b01011, enc_size), + VecMisc2::Abs => (0b0, 0b01011, enc_size), + VecMisc2::Fabs => { + debug_assert!( + size == VectorSize::Size32x2 + || size == VectorSize::Size32x4 + || size == VectorSize::Size64x2 + ); + (0b0, 0b01111, enc_size) + } + VecMisc2::Fneg => { + debug_assert!( + size == VectorSize::Size32x2 + || size == VectorSize::Size32x4 + || size == VectorSize::Size64x2 + ); + (0b1, 0b01111, enc_size) + } + VecMisc2::Fsqrt => { + debug_assert!( + size == VectorSize::Size32x2 + || size == VectorSize::Size32x4 + || size == VectorSize::Size64x2 + ); + (0b1, 0b11111, enc_size) + } + VecMisc2::Rev16 => { + debug_assert_eq!(size, VectorSize::Size8x16); + (0b0, 0b00001, enc_size) + } + VecMisc2::Rev32 => { + debug_assert!(size == VectorSize::Size8x16 || size == VectorSize::Size16x8); + (0b1, 0b00000, enc_size) + } + VecMisc2::Rev64 => { + debug_assert!( + size == VectorSize::Size8x16 + || size == VectorSize::Size16x8 + || size == VectorSize::Size32x4 + ); + (0b0, 0b00000, enc_size) + } + VecMisc2::Fcvtzs => { + debug_assert!( + size == VectorSize::Size32x2 + || size == VectorSize::Size32x4 + || size == VectorSize::Size64x2 + ); + (0b0, 0b11011, enc_size) + } + VecMisc2::Fcvtzu => { + debug_assert!( + size == VectorSize::Size32x2 + || size == VectorSize::Size32x4 + || size == VectorSize::Size64x2 + ); + (0b1, 0b11011, enc_size) + } + VecMisc2::Scvtf => { + debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2); + (0b0, 0b11101, enc_size & 0b1) + } + VecMisc2::Ucvtf => { + debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2); + (0b1, 0b11101, enc_size & 0b1) + } + VecMisc2::Frintn => { + debug_assert!( + size == VectorSize::Size32x2 + || size == VectorSize::Size32x4 + || size == VectorSize::Size64x2 + ); + (0b0, 0b11000, enc_size & 0b01) + } + VecMisc2::Frintz => { + debug_assert!( + size == VectorSize::Size32x2 + || size == VectorSize::Size32x4 + || size == VectorSize::Size64x2 + ); + (0b0, 0b11001, enc_size) + } + VecMisc2::Frintm => { + debug_assert!( + size == VectorSize::Size32x2 + || size == VectorSize::Size32x4 + || size == VectorSize::Size64x2 + ); + (0b0, 0b11001, enc_size & 0b01) + } + VecMisc2::Frintp => { + debug_assert!( + size == VectorSize::Size32x2 + || size == VectorSize::Size32x4 + || size == VectorSize::Size64x2 + ); + (0b0, 0b11000, enc_size) + } + VecMisc2::Cnt => { + debug_assert!(size == VectorSize::Size8x8 || size == VectorSize::Size8x16); + (0b0, 0b00101, enc_size) + } + VecMisc2::Cmeq0 => (0b0, 0b01001, enc_size), + VecMisc2::Cmge0 => (0b1, 0b01000, enc_size), + VecMisc2::Cmgt0 => (0b0, 0b01000, enc_size), + VecMisc2::Cmle0 => (0b1, 0b01001, enc_size), + VecMisc2::Cmlt0 => (0b0, 0b01010, enc_size), + VecMisc2::Fcmeq0 => { + debug_assert!( + size == VectorSize::Size32x2 + || size == VectorSize::Size32x4 + || size == VectorSize::Size64x2 + ); + (0b0, 0b01101, enc_size) + } + VecMisc2::Fcmge0 => { + debug_assert!( + size == VectorSize::Size32x2 + || size == VectorSize::Size32x4 + || size == VectorSize::Size64x2 + ); + (0b1, 0b01100, enc_size) + } + VecMisc2::Fcmgt0 => { + debug_assert!( + size == VectorSize::Size32x2 + || size == VectorSize::Size32x4 + || size == VectorSize::Size64x2 + ); + (0b0, 0b01100, enc_size) + } + VecMisc2::Fcmle0 => { + debug_assert!( + size == VectorSize::Size32x2 + || size == VectorSize::Size32x4 + || size == VectorSize::Size64x2 + ); + (0b1, 0b01101, enc_size) + } + VecMisc2::Fcmlt0 => { + debug_assert!( + size == VectorSize::Size32x2 + || size == VectorSize::Size32x4 + || size == VectorSize::Size64x2 + ); + (0b0, 0b01110, enc_size) + } + }; + sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn)); + } + &Inst::VecLanes { op, rd, rn, size } => { + let (q, size) = match size { + VectorSize::Size8x8 => (0b0, 0b00), + VectorSize::Size8x16 => (0b1, 0b00), + VectorSize::Size16x4 => (0b0, 0b01), + VectorSize::Size16x8 => (0b1, 0b01), + VectorSize::Size32x4 => (0b1, 0b10), + _ => unreachable!(), + }; + let (u, opcode) = match op { + VecLanesOp::Uminv => (0b1, 0b11010), + VecLanesOp::Addv => (0b0, 0b11011), + }; + sink.put4(enc_vec_lanes(q, u, size, opcode, rd, rn)); + } + &Inst::VecShiftImm { + op, + rd, + rn, + size, + imm, + } => { + let (is_shr, mut template) = match op { + VecShiftImmOp::Ushr => (true, 0b_001_011110_0000_000_000001_00000_00000_u32), + VecShiftImmOp::Sshr => (true, 0b_000_011110_0000_000_000001_00000_00000_u32), + VecShiftImmOp::Shl => (false, 0b_000_011110_0000_000_010101_00000_00000_u32), + }; + if size.is_128bits() { + template |= 0b1 << 30; + } + let imm = imm as u32; + // Deal with the somewhat strange encoding scheme for, and limits on, + // the shift amount. + let immh_immb = match (size.lane_size(), is_shr) { + (ScalarSize::Size64, true) if imm >= 1 && imm <= 64 => { + 0b_1000_000_u32 | (64 - imm) + } + (ScalarSize::Size32, true) if imm >= 1 && imm <= 32 => { + 0b_0100_000_u32 | (32 - imm) + } + (ScalarSize::Size16, true) if imm >= 1 && imm <= 16 => { + 0b_0010_000_u32 | (16 - imm) + } + (ScalarSize::Size8, true) if imm >= 1 && imm <= 8 => { + 0b_0001_000_u32 | (8 - imm) + } + (ScalarSize::Size64, false) if imm <= 63 => 0b_1000_000_u32 | imm, + (ScalarSize::Size32, false) if imm <= 31 => 0b_0100_000_u32 | imm, + (ScalarSize::Size16, false) if imm <= 15 => 0b_0010_000_u32 | imm, + (ScalarSize::Size8, false) if imm <= 7 => 0b_0001_000_u32 | imm, + _ => panic!( + "aarch64: Inst::VecShiftImm: emit: invalid op/size/imm {op:?}, {size:?}, {imm:?}" + ), + }; + let rn_enc = machreg_to_vec(rn); + let rd_enc = machreg_to_vec(rd.to_reg()); + sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc); + } + &Inst::VecShiftImmMod { + op, + rd, + ri, + rn, + size, + imm, + } => { + debug_assert_eq!(rd.to_reg(), ri); + let (is_shr, mut template) = match op { + VecShiftImmModOp::Sli => (false, 0b_001_011110_0000_000_010101_00000_00000_u32), + }; + if size.is_128bits() { + template |= 0b1 << 30; + } + let imm = imm as u32; + // Deal with the somewhat strange encoding scheme for, and limits on, + // the shift amount. + let immh_immb = match (size.lane_size(), is_shr) { + (ScalarSize::Size64, true) if imm >= 1 && imm <= 64 => { + 0b_1000_000_u32 | (64 - imm) + } + (ScalarSize::Size32, true) if imm >= 1 && imm <= 32 => { + 0b_0100_000_u32 | (32 - imm) + } + (ScalarSize::Size16, true) if imm >= 1 && imm <= 16 => { + 0b_0010_000_u32 | (16 - imm) + } + (ScalarSize::Size8, true) if imm >= 1 && imm <= 8 => { + 0b_0001_000_u32 | (8 - imm) + } + (ScalarSize::Size64, false) if imm <= 63 => 0b_1000_000_u32 | imm, + (ScalarSize::Size32, false) if imm <= 31 => 0b_0100_000_u32 | imm, + (ScalarSize::Size16, false) if imm <= 15 => 0b_0010_000_u32 | imm, + (ScalarSize::Size8, false) if imm <= 7 => 0b_0001_000_u32 | imm, + _ => panic!( + "aarch64: Inst::VecShiftImmMod: emit: invalid op/size/imm {op:?}, {size:?}, {imm:?}" + ), + }; + let rn_enc = machreg_to_vec(rn); + let rd_enc = machreg_to_vec(rd.to_reg()); + sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc); + } + &Inst::VecExtract { rd, rn, rm, imm4 } => { + if imm4 < 16 { + let template = 0b_01_101110_000_00000_0_0000_0_00000_00000_u32; + let rm_enc = machreg_to_vec(rm); + let rn_enc = machreg_to_vec(rn); + let rd_enc = machreg_to_vec(rd.to_reg()); + sink.put4( + template | (rm_enc << 16) | ((imm4 as u32) << 11) | (rn_enc << 5) | rd_enc, + ); + } else { + panic!("aarch64: Inst::VecExtract: emit: invalid extract index {imm4}"); + } + } + &Inst::VecTbl { rd, rn, rm } => { + sink.put4(enc_tbl(/* is_extension = */ false, 0b00, rd, rn, rm)); + } + &Inst::VecTblExt { rd, ri, rn, rm } => { + debug_assert_eq!(rd.to_reg(), ri); + sink.put4(enc_tbl(/* is_extension = */ true, 0b00, rd, rn, rm)); + } + &Inst::VecTbl2 { rd, rn, rn2, rm } => { + assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32); + sink.put4(enc_tbl(/* is_extension = */ false, 0b01, rd, rn, rm)); + } + &Inst::VecTbl2Ext { + rd, + ri, + rn, + rn2, + rm, + } => { + debug_assert_eq!(rd.to_reg(), ri); + assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32); + sink.put4(enc_tbl(/* is_extension = */ true, 0b01, rd, rn, rm)); + } + &Inst::FpuCmp { size, rn, rm } => { + sink.put4(enc_fcmp(size, rn, rm)); + } + &Inst::FpuToInt { op, rd, rn } => { + let top16 = match op { + // FCVTZS (32/32-bit) + FpuToIntOp::F32ToI32 => 0b000_11110_00_1_11_000, + // FCVTZU (32/32-bit) + FpuToIntOp::F32ToU32 => 0b000_11110_00_1_11_001, + // FCVTZS (32/64-bit) + FpuToIntOp::F32ToI64 => 0b100_11110_00_1_11_000, + // FCVTZU (32/64-bit) + FpuToIntOp::F32ToU64 => 0b100_11110_00_1_11_001, + // FCVTZS (64/32-bit) + FpuToIntOp::F64ToI32 => 0b000_11110_01_1_11_000, + // FCVTZU (64/32-bit) + FpuToIntOp::F64ToU32 => 0b000_11110_01_1_11_001, + // FCVTZS (64/64-bit) + FpuToIntOp::F64ToI64 => 0b100_11110_01_1_11_000, + // FCVTZU (64/64-bit) + FpuToIntOp::F64ToU64 => 0b100_11110_01_1_11_001, + }; + sink.put4(enc_fputoint(top16, rd, rn)); + } + &Inst::IntToFpu { op, rd, rn } => { + let top16 = match op { + // SCVTF (32/32-bit) + IntToFpuOp::I32ToF32 => 0b000_11110_00_1_00_010, + // UCVTF (32/32-bit) + IntToFpuOp::U32ToF32 => 0b000_11110_00_1_00_011, + // SCVTF (64/32-bit) + IntToFpuOp::I64ToF32 => 0b100_11110_00_1_00_010, + // UCVTF (64/32-bit) + IntToFpuOp::U64ToF32 => 0b100_11110_00_1_00_011, + // SCVTF (32/64-bit) + IntToFpuOp::I32ToF64 => 0b000_11110_01_1_00_010, + // UCVTF (32/64-bit) + IntToFpuOp::U32ToF64 => 0b000_11110_01_1_00_011, + // SCVTF (64/64-bit) + IntToFpuOp::I64ToF64 => 0b100_11110_01_1_00_010, + // UCVTF (64/64-bit) + IntToFpuOp::U64ToF64 => 0b100_11110_01_1_00_011, + }; + sink.put4(enc_inttofpu(top16, rd, rn)); + } + &Inst::FpuCSel16 { rd, rn, rm, cond } => { + sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size16)); + } + &Inst::FpuCSel32 { rd, rn, rm, cond } => { + sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size32)); + } + &Inst::FpuCSel64 { rd, rn, rm, cond } => { + sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size64)); + } + &Inst::FpuRound { op, rd, rn } => { + let top22 = match op { + FpuRoundMode::Minus32 => 0b000_11110_00_1_001_010_10000, + FpuRoundMode::Minus64 => 0b000_11110_01_1_001_010_10000, + FpuRoundMode::Plus32 => 0b000_11110_00_1_001_001_10000, + FpuRoundMode::Plus64 => 0b000_11110_01_1_001_001_10000, + FpuRoundMode::Zero32 => 0b000_11110_00_1_001_011_10000, + FpuRoundMode::Zero64 => 0b000_11110_01_1_001_011_10000, + FpuRoundMode::Nearest32 => 0b000_11110_00_1_001_000_10000, + FpuRoundMode::Nearest64 => 0b000_11110_01_1_001_000_10000, + }; + sink.put4(enc_fround(top22, rd, rn)); + } + &Inst::MovToFpu { rd, rn, size } => { + let template = match size { + ScalarSize::Size16 => 0b000_11110_11_1_00_111_000000_00000_00000, + ScalarSize::Size32 => 0b000_11110_00_1_00_111_000000_00000_00000, + ScalarSize::Size64 => 0b100_11110_01_1_00_111_000000_00000_00000, + _ => unreachable!(), + }; + sink.put4(template | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg())); + } + &Inst::FpuMoveFPImm { rd, imm, size } => { + sink.put4( + 0b000_11110_00_1_00_000_000100_00000_00000 + | size.ftype() << 22 + | ((imm.enc_bits() as u32) << 13) + | machreg_to_vec(rd.to_reg()), + ); + } + &Inst::MovToVec { + rd, + ri, + rn, + idx, + size, + } => { + debug_assert_eq!(rd.to_reg(), ri); + let (imm5, shift) = match size.lane_size() { + ScalarSize::Size8 => (0b00001, 1), + ScalarSize::Size16 => (0b00010, 2), + ScalarSize::Size32 => (0b00100, 3), + ScalarSize::Size64 => (0b01000, 4), + _ => unreachable!(), + }; + debug_assert_eq!(idx & (0b11111 >> shift), idx); + let imm5 = imm5 | ((idx as u32) << shift); + sink.put4( + 0b010_01110000_00000_0_0011_1_00000_00000 + | (imm5 << 16) + | (machreg_to_gpr(rn) << 5) + | machreg_to_vec(rd.to_reg()), + ); + } + &Inst::MovFromVec { rd, rn, idx, size } => { + let (q, imm5, shift, mask) = match size { + ScalarSize::Size8 => (0b0, 0b00001, 1, 0b1111), + ScalarSize::Size16 => (0b0, 0b00010, 2, 0b0111), + ScalarSize::Size32 => (0b0, 0b00100, 3, 0b0011), + ScalarSize::Size64 => (0b1, 0b01000, 4, 0b0001), + _ => panic!("Unexpected scalar FP operand size: {size:?}"), + }; + debug_assert_eq!(idx & mask, idx); + let imm5 = imm5 | ((idx as u32) << shift); + sink.put4( + 0b000_01110000_00000_0_0111_1_00000_00000 + | (q << 30) + | (imm5 << 16) + | (machreg_to_vec(rn) << 5) + | machreg_to_gpr(rd.to_reg()), + ); + } + &Inst::MovFromVecSigned { + rd, + rn, + idx, + size, + scalar_size, + } => { + let (imm5, shift, half) = match size { + VectorSize::Size8x8 => (0b00001, 1, true), + VectorSize::Size8x16 => (0b00001, 1, false), + VectorSize::Size16x4 => (0b00010, 2, true), + VectorSize::Size16x8 => (0b00010, 2, false), + VectorSize::Size32x2 => { + debug_assert_ne!(scalar_size, OperandSize::Size32); + (0b00100, 3, true) + } + VectorSize::Size32x4 => { + debug_assert_ne!(scalar_size, OperandSize::Size32); + (0b00100, 3, false) + } + _ => panic!("Unexpected vector operand size"), + }; + debug_assert_eq!(idx & (0b11111 >> (half as u32 + shift)), idx); + let imm5 = imm5 | ((idx as u32) << shift); + sink.put4( + 0b000_01110000_00000_0_0101_1_00000_00000 + | (scalar_size.is64() as u32) << 30 + | (imm5 << 16) + | (machreg_to_vec(rn) << 5) + | machreg_to_gpr(rd.to_reg()), + ); + } + &Inst::VecDup { rd, rn, size } => { + let q = size.is_128bits() as u32; + let imm5 = match size.lane_size() { + ScalarSize::Size8 => 0b00001, + ScalarSize::Size16 => 0b00010, + ScalarSize::Size32 => 0b00100, + ScalarSize::Size64 => 0b01000, + _ => unreachable!(), + }; + sink.put4( + 0b0_0_0_01110000_00000_000011_00000_00000 + | (q << 30) + | (imm5 << 16) + | (machreg_to_gpr(rn) << 5) + | machreg_to_vec(rd.to_reg()), + ); + } + &Inst::VecDupFromFpu { rd, rn, size, lane } => { + let q = size.is_128bits() as u32; + let imm5 = match size.lane_size() { + ScalarSize::Size8 => { + assert!(lane < 16); + 0b00001 | (u32::from(lane) << 1) + } + ScalarSize::Size16 => { + assert!(lane < 8); + 0b00010 | (u32::from(lane) << 2) + } + ScalarSize::Size32 => { + assert!(lane < 4); + 0b00100 | (u32::from(lane) << 3) + } + ScalarSize::Size64 => { + assert!(lane < 2); + 0b01000 | (u32::from(lane) << 4) + } + _ => unimplemented!(), + }; + sink.put4( + 0b000_01110000_00000_000001_00000_00000 + | (q << 30) + | (imm5 << 16) + | (machreg_to_vec(rn) << 5) + | machreg_to_vec(rd.to_reg()), + ); + } + &Inst::VecDupFPImm { rd, imm, size } => { + let imm = imm.enc_bits(); + let op = match size.lane_size() { + ScalarSize::Size32 => 0, + ScalarSize::Size64 => 1, + _ => unimplemented!(), + }; + let q_op = op | ((size.is_128bits() as u32) << 1); + + sink.put4(enc_asimd_mod_imm(rd, q_op, 0b1111, imm)); + } + &Inst::VecDupImm { + rd, + imm, + invert, + size, + } => { + let (imm, shift, shift_ones) = imm.value(); + let (op, cmode) = match size.lane_size() { + ScalarSize::Size8 => { + assert!(!invert); + assert_eq!(shift, 0); + + (0, 0b1110) + } + ScalarSize::Size16 => { + let s = shift & 8; + + assert!(!shift_ones); + assert_eq!(s, shift); + + (invert as u32, 0b1000 | (s >> 2)) + } + ScalarSize::Size32 => { + if shift_ones { + assert!(shift == 8 || shift == 16); + + (invert as u32, 0b1100 | (shift >> 4)) + } else { + let s = shift & 24; + + assert_eq!(s, shift); + + (invert as u32, 0b0000 | (s >> 2)) + } + } + ScalarSize::Size64 => { + assert!(!invert); + assert_eq!(shift, 0); + + (1, 0b1110) + } + _ => unreachable!(), + }; + let q_op = op | ((size.is_128bits() as u32) << 1); + + sink.put4(enc_asimd_mod_imm(rd, q_op, cmode, imm)); + } + &Inst::VecExtend { + t, + rd, + rn, + high_half, + lane_size, + } => { + let immh = match lane_size { + ScalarSize::Size16 => 0b001, + ScalarSize::Size32 => 0b010, + ScalarSize::Size64 => 0b100, + _ => panic!("Unexpected VecExtend to lane size of {lane_size:?}"), + }; + let u = match t { + VecExtendOp::Sxtl => 0b0, + VecExtendOp::Uxtl => 0b1, + }; + sink.put4( + 0b000_011110_0000_000_101001_00000_00000 + | ((high_half as u32) << 30) + | (u << 29) + | (immh << 19) + | (machreg_to_vec(rn) << 5) + | machreg_to_vec(rd.to_reg()), + ); + } + &Inst::VecRRLong { + op, + rd, + rn, + high_half, + } => { + let (u, size, bits_12_16) = match op { + VecRRLongOp::Fcvtl16 => (0b0, 0b00, 0b10111), + VecRRLongOp::Fcvtl32 => (0b0, 0b01, 0b10111), + VecRRLongOp::Shll8 => (0b1, 0b00, 0b10011), + VecRRLongOp::Shll16 => (0b1, 0b01, 0b10011), + VecRRLongOp::Shll32 => (0b1, 0b10, 0b10011), + }; + + sink.put4(enc_vec_rr_misc( + ((high_half as u32) << 1) | u, + size, + bits_12_16, + rd, + rn, + )); + } + &Inst::VecRRNarrowLow { + op, + rd, + rn, + lane_size, + } + | &Inst::VecRRNarrowHigh { + op, + rd, + rn, + lane_size, + .. + } => { + let high_half = match self { + &Inst::VecRRNarrowLow { .. } => false, + &Inst::VecRRNarrowHigh { .. } => true, + _ => unreachable!(), + }; + + let size = match lane_size { + ScalarSize::Size8 => 0b00, + ScalarSize::Size16 => 0b01, + ScalarSize::Size32 => 0b10, + _ => panic!("unsupported size: {lane_size:?}"), + }; + + // Floats use a single bit, to encode either half or single. + let size = match op { + VecRRNarrowOp::Fcvtn => size >> 1, + _ => size, + }; + + let (u, bits_12_16) = match op { + VecRRNarrowOp::Xtn => (0b0, 0b10010), + VecRRNarrowOp::Sqxtn => (0b0, 0b10100), + VecRRNarrowOp::Sqxtun => (0b1, 0b10010), + VecRRNarrowOp::Uqxtn => (0b1, 0b10100), + VecRRNarrowOp::Fcvtn => (0b0, 0b10110), + }; + + sink.put4(enc_vec_rr_misc( + ((high_half as u32) << 1) | u, + size, + bits_12_16, + rd, + rn, + )); + } + &Inst::VecMovElement { + rd, + ri, + rn, + dest_idx, + src_idx, + size, + } => { + debug_assert_eq!(rd.to_reg(), ri); + let (imm5, shift) = match size.lane_size() { + ScalarSize::Size8 => (0b00001, 1), + ScalarSize::Size16 => (0b00010, 2), + ScalarSize::Size32 => (0b00100, 3), + ScalarSize::Size64 => (0b01000, 4), + _ => unreachable!(), + }; + let mask = 0b11111 >> shift; + debug_assert_eq!(dest_idx & mask, dest_idx); + debug_assert_eq!(src_idx & mask, src_idx); + let imm4 = (src_idx as u32) << (shift - 1); + let imm5 = imm5 | ((dest_idx as u32) << shift); + sink.put4( + 0b011_01110000_00000_0_0000_1_00000_00000 + | (imm5 << 16) + | (imm4 << 11) + | (machreg_to_vec(rn) << 5) + | machreg_to_vec(rd.to_reg()), + ); + } + &Inst::VecRRPair { op, rd, rn } => { + let bits_12_16 = match op { + VecPairOp::Addp => 0b11011, + }; + + sink.put4(enc_vec_rr_pair(bits_12_16, rd, rn)); + } + &Inst::VecRRRLong { + rd, + rn, + rm, + alu_op, + high_half, + } => { + let (u, size, bit14) = match alu_op { + VecRRRLongOp::Smull8 => (0b0, 0b00, 0b1), + VecRRRLongOp::Smull16 => (0b0, 0b01, 0b1), + VecRRRLongOp::Smull32 => (0b0, 0b10, 0b1), + VecRRRLongOp::Umull8 => (0b1, 0b00, 0b1), + VecRRRLongOp::Umull16 => (0b1, 0b01, 0b1), + VecRRRLongOp::Umull32 => (0b1, 0b10, 0b1), + }; + sink.put4(enc_vec_rrr_long( + high_half as u32, + u, + size, + bit14, + rm, + rn, + rd, + )); + } + &Inst::VecRRRLongMod { + rd, + ri, + rn, + rm, + alu_op, + high_half, + } => { + debug_assert_eq!(rd.to_reg(), ri); + let (u, size, bit14) = match alu_op { + VecRRRLongModOp::Umlal8 => (0b1, 0b00, 0b0), + VecRRRLongModOp::Umlal16 => (0b1, 0b01, 0b0), + VecRRRLongModOp::Umlal32 => (0b1, 0b10, 0b0), + }; + sink.put4(enc_vec_rrr_long( + high_half as u32, + u, + size, + bit14, + rm, + rn, + rd, + )); + } + &Inst::VecRRPairLong { op, rd, rn } => { + let (u, size) = match op { + VecRRPairLongOp::Saddlp8 => (0b0, 0b0), + VecRRPairLongOp::Uaddlp8 => (0b1, 0b0), + VecRRPairLongOp::Saddlp16 => (0b0, 0b1), + VecRRPairLongOp::Uaddlp16 => (0b1, 0b1), + }; + + sink.put4(enc_vec_rr_pair_long(u, size, rd, rn)); + } + &Inst::VecRRR { + rd, + rn, + rm, + alu_op, + size, + } => { + let (q, enc_size) = size.enc_size(); + let is_float = match alu_op { + VecALUOp::Fcmeq + | VecALUOp::Fcmgt + | VecALUOp::Fcmge + | VecALUOp::Fadd + | VecALUOp::Fsub + | VecALUOp::Fdiv + | VecALUOp::Fmax + | VecALUOp::Fmin + | VecALUOp::Fmul => true, + _ => false, + }; + + let (top11, bit15_10) = match alu_op { + VecALUOp::Sqadd => (0b000_01110_00_1 | enc_size << 1, 0b000011), + VecALUOp::Sqsub => (0b000_01110_00_1 | enc_size << 1, 0b001011), + VecALUOp::Uqadd => (0b001_01110_00_1 | enc_size << 1, 0b000011), + VecALUOp::Uqsub => (0b001_01110_00_1 | enc_size << 1, 0b001011), + VecALUOp::Cmeq => (0b001_01110_00_1 | enc_size << 1, 0b100011), + VecALUOp::Cmge => (0b000_01110_00_1 | enc_size << 1, 0b001111), + VecALUOp::Cmgt => (0b000_01110_00_1 | enc_size << 1, 0b001101), + VecALUOp::Cmhi => (0b001_01110_00_1 | enc_size << 1, 0b001101), + VecALUOp::Cmhs => (0b001_01110_00_1 | enc_size << 1, 0b001111), + VecALUOp::Fcmeq => (0b000_01110_00_1, 0b111001), + VecALUOp::Fcmgt => (0b001_01110_10_1, 0b111001), + VecALUOp::Fcmge => (0b001_01110_00_1, 0b111001), + // The following logical instructions operate on bytes, so are not encoded differently + // for the different vector types. + VecALUOp::And => (0b000_01110_00_1, 0b000111), + VecALUOp::Bic => (0b000_01110_01_1, 0b000111), + VecALUOp::Orr => (0b000_01110_10_1, 0b000111), + VecALUOp::Eor => (0b001_01110_00_1, 0b000111), + VecALUOp::Umaxp => { + debug_assert_ne!(size, VectorSize::Size64x2); + + (0b001_01110_00_1 | enc_size << 1, 0b101001) + } + VecALUOp::Add => (0b000_01110_00_1 | enc_size << 1, 0b100001), + VecALUOp::Sub => (0b001_01110_00_1 | enc_size << 1, 0b100001), + VecALUOp::Mul => { + debug_assert_ne!(size, VectorSize::Size64x2); + (0b000_01110_00_1 | enc_size << 1, 0b100111) + } + VecALUOp::Sshl => (0b000_01110_00_1 | enc_size << 1, 0b010001), + VecALUOp::Ushl => (0b001_01110_00_1 | enc_size << 1, 0b010001), + VecALUOp::Umin => { + debug_assert_ne!(size, VectorSize::Size64x2); + + (0b001_01110_00_1 | enc_size << 1, 0b011011) + } + VecALUOp::Smin => { + debug_assert_ne!(size, VectorSize::Size64x2); + + (0b000_01110_00_1 | enc_size << 1, 0b011011) + } + VecALUOp::Umax => { + debug_assert_ne!(size, VectorSize::Size64x2); + + (0b001_01110_00_1 | enc_size << 1, 0b011001) + } + VecALUOp::Smax => { + debug_assert_ne!(size, VectorSize::Size64x2); + + (0b000_01110_00_1 | enc_size << 1, 0b011001) + } + VecALUOp::Urhadd => { + debug_assert_ne!(size, VectorSize::Size64x2); + + (0b001_01110_00_1 | enc_size << 1, 0b000101) + } + VecALUOp::Fadd => (0b000_01110_00_1, 0b110101), + VecALUOp::Fsub => (0b000_01110_10_1, 0b110101), + VecALUOp::Fdiv => (0b001_01110_00_1, 0b111111), + VecALUOp::Fmax => (0b000_01110_00_1, 0b111101), + VecALUOp::Fmin => (0b000_01110_10_1, 0b111101), + VecALUOp::Fmul => (0b001_01110_00_1, 0b110111), + VecALUOp::Addp => (0b000_01110_00_1 | enc_size << 1, 0b101111), + VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110), + VecALUOp::Zip2 => (0b01001110_00_0 | enc_size << 1, 0b011110), + VecALUOp::Sqrdmulh => { + debug_assert!( + size.lane_size() == ScalarSize::Size16 + || size.lane_size() == ScalarSize::Size32 + ); + + (0b001_01110_00_1 | enc_size << 1, 0b101101) + } + VecALUOp::Uzp1 => (0b01001110_00_0 | enc_size << 1, 0b000110), + VecALUOp::Uzp2 => (0b01001110_00_0 | enc_size << 1, 0b010110), + VecALUOp::Trn1 => (0b01001110_00_0 | enc_size << 1, 0b001010), + VecALUOp::Trn2 => (0b01001110_00_0 | enc_size << 1, 0b011010), + }; + let top11 = if is_float { + top11 | size.enc_float_size() << 1 + } else { + top11 + }; + sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd)); + } + &Inst::VecRRRMod { + rd, + ri, + rn, + rm, + alu_op, + size, + } => { + debug_assert_eq!(rd.to_reg(), ri); + let (q, _enc_size) = size.enc_size(); + + let (top11, bit15_10) = match alu_op { + VecALUModOp::Bsl => (0b001_01110_01_1, 0b000111), + VecALUModOp::Fmla => { + (0b000_01110_00_1 | (size.enc_float_size() << 1), 0b110011) + } + VecALUModOp::Fmls => { + (0b000_01110_10_1 | (size.enc_float_size() << 1), 0b110011) + } + }; + sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd)); + } + &Inst::VecFmlaElem { + rd, + ri, + rn, + rm, + alu_op, + size, + idx, + } => { + debug_assert_eq!(rd.to_reg(), ri); + let idx = u32::from(idx); + + let (q, _size) = size.enc_size(); + let o2 = match alu_op { + VecALUModOp::Fmla => 0b0, + VecALUModOp::Fmls => 0b1, + _ => unreachable!(), + }; + + let (h, l) = match size { + VectorSize::Size32x4 => { + assert!(idx < 4); + (idx >> 1, idx & 1) + } + VectorSize::Size64x2 => { + assert!(idx < 2); + (idx, 0) + } + _ => unreachable!(), + }; + + let top11 = 0b000_011111_00 | (q << 9) | (size.enc_float_size() << 1) | l; + let bit15_10 = 0b000100 | (o2 << 4) | (h << 1); + sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd)); + } + &Inst::VecLoadReplicate { + rd, + rn, + size, + flags, + } => { + let (q, size) = size.enc_size(); + + if let Some(trap_code) = flags.trap_code() { + // Register the offset at which the actual load instruction starts. + sink.add_trap(trap_code); + } + + sink.put4(enc_ldst_vec(q, size, rn, rd)); + } + &Inst::VecCSel { rd, rn, rm, cond } => { + /* Emit this: + b.cond else + mov rd, rm + b out + else: + mov rd, rn + out: + + Note, we could do better in the cases where rd == rn or rd == rm. + */ + let else_label = sink.get_label(); + let out_label = sink.get_label(); + + // b.cond else + let br_else_offset = sink.cur_offset(); + sink.put4(enc_conditional_br( + BranchTarget::Label(else_label), + CondBrKind::Cond(cond), + )); + sink.use_label_at_offset(br_else_offset, else_label, LabelUse::Branch19); + + // mov rd, rm + sink.put4(enc_vecmov(/* 16b = */ true, rd, rm)); + + // b out + let b_out_offset = sink.cur_offset(); + sink.use_label_at_offset(b_out_offset, out_label, LabelUse::Branch26); + sink.add_uncond_branch(b_out_offset, b_out_offset + 4, out_label); + sink.put4(enc_jump26(0b000101, 0 /* will be fixed up later */)); + + // else: + sink.bind_label(else_label, &mut state.ctrl_plane); + + // mov rd, rn + sink.put4(enc_vecmov(/* 16b = */ true, rd, rn)); + + // out: + sink.bind_label(out_label, &mut state.ctrl_plane); + } + &Inst::MovToNZCV { rn } => { + sink.put4(0xd51b4200 | machreg_to_gpr(rn)); + } + &Inst::MovFromNZCV { rd } => { + sink.put4(0xd53b4200 | machreg_to_gpr(rd.to_reg())); + } + &Inst::Extend { + rd, + rn, + signed: false, + from_bits: 1, + to_bits, + } => { + assert!(to_bits <= 64); + // Reduce zero-extend-from-1-bit to: + // - and rd, rn, #1 + // Note: This is special cased as UBFX may take more cycles + // than AND on smaller cores. + let imml = ImmLogic::maybe_from_u64(1, I32).unwrap(); + Inst::AluRRImmLogic { + alu_op: ALUOp::And, + size: OperandSize::Size32, + rd, + rn, + imml, + } + .emit(sink, emit_info, state); + } + &Inst::Extend { + rd, + rn, + signed: false, + from_bits: 32, + to_bits: 64, + } => { + let mov = Inst::Mov { + size: OperandSize::Size32, + rd, + rm: rn, + }; + mov.emit(sink, emit_info, state); + } + &Inst::Extend { + rd, + rn, + signed, + from_bits, + to_bits, + } => { + let (opc, size) = if signed { + (0b00, OperandSize::from_bits(to_bits)) + } else { + (0b10, OperandSize::Size32) + }; + sink.put4(enc_bfm(opc, size, rd, rn, 0, from_bits - 1)); + } + &Inst::Jump { ref dest } => { + let off = sink.cur_offset(); + // Indicate that the jump uses a label, if so, so that a fixup can occur later. + if let Some(l) = dest.as_label() { + sink.use_label_at_offset(off, l, LabelUse::Branch26); + sink.add_uncond_branch(off, off + 4, l); + } + // Emit the jump itself. + sink.put4(enc_jump26(0b000101, dest.as_offset26_or_zero())); + } + &Inst::Args { .. } | &Inst::Rets { .. } => { + // Nothing: this is a pseudoinstruction that serves + // only to constrain registers at a certain point. + } + &Inst::Ret {} => { + sink.put4(0xd65f03c0); + } + &Inst::AuthenticatedRet { key, is_hint } => { + let (op2, is_hint) = match key { + APIKey::AZ => (0b100, true), + APIKey::ASP => (0b101, is_hint), + APIKey::BZ => (0b110, true), + APIKey::BSP => (0b111, is_hint), + }; + + if is_hint { + sink.put4(key.enc_auti_hint()); + Inst::Ret {}.emit(sink, emit_info, state); + } else { + sink.put4(0xd65f0bff | (op2 << 9)); // reta{key} + } + } + &Inst::Call { ref info } => { + let user_stack_map = state.take_stack_map(); + sink.add_reloc(Reloc::Arm64Call, &info.dest, 0); + sink.put4(enc_jump26(0b100101, 0)); + if let Some(s) = user_stack_map { + let offset = sink.cur_offset(); + sink.push_user_stack_map(state, offset, s); + } + sink.add_call_site(); + + if info.callee_pop_size > 0 { + let callee_pop_size = + i32::try_from(info.callee_pop_size).expect("callee popped more than 2GB"); + for inst in AArch64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) { + inst.emit(sink, emit_info, state); + } + } + } + &Inst::CallInd { ref info } => { + let user_stack_map = state.take_stack_map(); + sink.put4( + 0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(info.dest) << 5), + ); + if let Some(s) = user_stack_map { + let offset = sink.cur_offset(); + sink.push_user_stack_map(state, offset, s); + } + sink.add_call_site(); + + if info.callee_pop_size > 0 { + let callee_pop_size = + i32::try_from(info.callee_pop_size).expect("callee popped more than 2GB"); + for inst in AArch64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) { + inst.emit(sink, emit_info, state); + } + } + } + &Inst::ReturnCall { ref info } => { + emit_return_call_common_sequence(sink, emit_info, state, info); + + // Note: this is not `Inst::Jump { .. }.emit(..)` because we + // have different metadata in this case: we don't have a label + // for the target, but rather a function relocation. + sink.add_reloc(Reloc::Arm64Call, &info.dest, 0); + sink.put4(enc_jump26(0b000101, 0)); + sink.add_call_site(); + + // `emit_return_call_common_sequence` emits an island if + // necessary, so we can safely disable the worst-case-size check + // in this case. + start_off = sink.cur_offset(); + } + &Inst::ReturnCallInd { ref info } => { + emit_return_call_common_sequence(sink, emit_info, state, info); + + Inst::IndirectBr { + rn: info.dest, + targets: vec![], + } + .emit(sink, emit_info, state); + sink.add_call_site(); + + // `emit_return_call_common_sequence` emits an island if + // necessary, so we can safely disable the worst-case-size check + // in this case. + start_off = sink.cur_offset(); + } + &Inst::CondBr { + taken, + not_taken, + kind, + } => { + // Conditional part first. + let cond_off = sink.cur_offset(); + if let Some(l) = taken.as_label() { + sink.use_label_at_offset(cond_off, l, LabelUse::Branch19); + let inverted = enc_conditional_br(taken, kind.invert()).to_le_bytes(); + sink.add_cond_branch(cond_off, cond_off + 4, l, &inverted[..]); + } + sink.put4(enc_conditional_br(taken, kind)); + + // Unconditional part next. + let uncond_off = sink.cur_offset(); + if let Some(l) = not_taken.as_label() { + sink.use_label_at_offset(uncond_off, l, LabelUse::Branch26); + sink.add_uncond_branch(uncond_off, uncond_off + 4, l); + } + sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero())); + } + &Inst::TestBitAndBranch { + taken, + not_taken, + kind, + rn, + bit, + } => { + // Emit the conditional branch first + let cond_off = sink.cur_offset(); + if let Some(l) = taken.as_label() { + sink.use_label_at_offset(cond_off, l, LabelUse::Branch14); + let inverted = + enc_test_bit_and_branch(kind.complement(), taken, rn, bit).to_le_bytes(); + sink.add_cond_branch(cond_off, cond_off + 4, l, &inverted[..]); + } + sink.put4(enc_test_bit_and_branch(kind, taken, rn, bit)); + + // Unconditional part next. + let uncond_off = sink.cur_offset(); + if let Some(l) = not_taken.as_label() { + sink.use_label_at_offset(uncond_off, l, LabelUse::Branch26); + sink.add_uncond_branch(uncond_off, uncond_off + 4, l); + } + sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero())); + } + &Inst::TrapIf { kind, trap_code } => { + let label = sink.defer_trap(trap_code); + // condbr KIND, LABEL + let off = sink.cur_offset(); + sink.put4(enc_conditional_br(BranchTarget::Label(label), kind)); + sink.use_label_at_offset(off, label, LabelUse::Branch19); + } + &Inst::IndirectBr { rn, .. } => { + sink.put4(enc_br(rn)); + } + &Inst::Nop0 => {} + &Inst::Nop4 => { + sink.put4(0xd503201f); + } + &Inst::Brk => { + sink.put4(0xd4200000); + } + &Inst::Udf { trap_code } => { + sink.add_trap(trap_code); + sink.put_data(Inst::TRAP_OPCODE); + } + &Inst::Adr { rd, off } => { + assert!(off > -(1 << 20)); + assert!(off < (1 << 20)); + sink.put4(enc_adr(off, rd)); + } + &Inst::Adrp { rd, off } => { + assert!(off > -(1 << 20)); + assert!(off < (1 << 20)); + sink.put4(enc_adrp(off, rd)); + } + &Inst::Word4 { data } => { + sink.put4(data); + } + &Inst::Word8 { data } => { + sink.put8(data); + } + &Inst::JTSequence { + ridx, + rtmp1, + rtmp2, + default, + ref targets, + .. + } => { + // This sequence is *one* instruction in the vcode, and is expanded only here at + // emission time, because we cannot allow the regalloc to insert spills/reloads in + // the middle; we depend on hardcoded PC-rel addressing below. + + // Branch to default when condition code from prior comparison indicates. + let br = + enc_conditional_br(BranchTarget::Label(default), CondBrKind::Cond(Cond::Hs)); + + // No need to inform the sink's branch folding logic about this branch, because it + // will not be merged with any other branch, flipped, or elided (it is not preceded + // or succeeded by any other branch). Just emit it with the label use. + let default_br_offset = sink.cur_offset(); + sink.use_label_at_offset(default_br_offset, default, LabelUse::Branch19); + sink.put4(br); + + // Overwrite the index with a zero when the above + // branch misspeculates (Spectre mitigation). Save the + // resulting index in rtmp2. + let inst = Inst::CSel { + rd: rtmp2, + cond: Cond::Hs, + rn: zero_reg(), + rm: ridx, + }; + inst.emit(sink, emit_info, state); + // Prevent any data value speculation. + Inst::Csdb.emit(sink, emit_info, state); + + // Load address of jump table + let inst = Inst::Adr { rd: rtmp1, off: 16 }; + inst.emit(sink, emit_info, state); + // Load value out of jump table + let inst = Inst::SLoad32 { + rd: rtmp2, + mem: AMode::reg_plus_reg_scaled_extended( + rtmp1.to_reg(), + rtmp2.to_reg(), + ExtendOp::UXTW, + ), + flags: MemFlags::trusted(), + }; + inst.emit(sink, emit_info, state); + // Add base of jump table to jump-table-sourced block offset + let inst = Inst::AluRRR { + alu_op: ALUOp::Add, + size: OperandSize::Size64, + rd: rtmp1, + rn: rtmp1.to_reg(), + rm: rtmp2.to_reg(), + }; + inst.emit(sink, emit_info, state); + // Branch to computed address. (`targets` here is only used for successor queries + // and is not needed for emission.) + let inst = Inst::IndirectBr { + rn: rtmp1.to_reg(), + targets: vec![], + }; + inst.emit(sink, emit_info, state); + // Emit jump table (table of 32-bit offsets). + let jt_off = sink.cur_offset(); + for &target in targets.iter() { + let word_off = sink.cur_offset(); + // off_into_table is an addend here embedded in the label to be later patched + // at the end of codegen. The offset is initially relative to this jump table + // entry; with the extra addend, it'll be relative to the jump table's start, + // after patching. + let off_into_table = word_off - jt_off; + sink.use_label_at_offset(word_off, target, LabelUse::PCRel32); + sink.put4(off_into_table); + } + + // Lowering produces an EmitIsland before using a JTSequence, so we can safely + // disable the worst-case-size check in this case. + start_off = sink.cur_offset(); + } + &Inst::LoadExtName { + rd, + ref name, + offset, + } => { + if emit_info.0.is_pic() { + // See this CE Example for the variations of this with and without BTI & PAUTH + // https://godbolt.org/z/ncqjbbvvn + // + // Emit the following code: + // adrp rd, :got:X + // ldr rd, [rd, :got_lo12:X] + + // adrp rd, symbol + sink.add_reloc(Reloc::Aarch64AdrGotPage21, &**name, 0); + let inst = Inst::Adrp { rd, off: 0 }; + inst.emit(sink, emit_info, state); + + // ldr rd, [rd, :got_lo12:X] + sink.add_reloc(Reloc::Aarch64Ld64GotLo12Nc, &**name, 0); + let inst = Inst::ULoad64 { + rd, + mem: AMode::reg(rd.to_reg()), + flags: MemFlags::trusted(), + }; + inst.emit(sink, emit_info, state); + } else { + // With absolute offsets we set up a load from a preallocated space, and then jump + // over it. + // + // Emit the following code: + // ldr rd, #8 + // b #0x10 + // <8 byte space> + + let inst = Inst::ULoad64 { + rd, + mem: AMode::Label { + label: MemLabel::PCRel(8), + }, + flags: MemFlags::trusted(), + }; + inst.emit(sink, emit_info, state); + let inst = Inst::Jump { + dest: BranchTarget::ResolvedOffset(12), + }; + inst.emit(sink, emit_info, state); + sink.add_reloc(Reloc::Abs8, &**name, offset); + sink.put8(0); + } + } + &Inst::LoadAddr { rd, ref mem } => { + let mem = mem.clone(); + let (mem_insts, mem) = mem_finalize(Some(sink), &mem, I8, state); + for inst in mem_insts.into_iter() { + inst.emit(sink, emit_info, state); + } + + let (reg, index_reg, offset) = match mem { + AMode::RegExtended { rn, rm, extendop } => { + let r = rn; + (r, Some((rm, extendop)), 0) + } + AMode::Unscaled { rn, simm9 } => { + let r = rn; + (r, None, simm9.value()) + } + AMode::UnsignedOffset { rn, uimm12 } => { + let r = rn; + (r, None, uimm12.value() as i32) + } + _ => panic!("Unsupported case for LoadAddr: {mem:?}"), + }; + let abs_offset = if offset < 0 { + -offset as u64 + } else { + offset as u64 + }; + let alu_op = if offset < 0 { ALUOp::Sub } else { ALUOp::Add }; + + if let Some((idx, extendop)) = index_reg { + let add = Inst::AluRRRExtend { + alu_op: ALUOp::Add, + size: OperandSize::Size64, + rd, + rn: reg, + rm: idx, + extendop, + }; + + add.emit(sink, emit_info, state); + } else if offset == 0 { + if reg != rd.to_reg() { + let mov = Inst::Mov { + size: OperandSize::Size64, + rd, + rm: reg, + }; + + mov.emit(sink, emit_info, state); + } + } else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) { + let add = Inst::AluRRImm12 { + alu_op, + size: OperandSize::Size64, + rd, + rn: reg, + imm12, + }; + add.emit(sink, emit_info, state); + } else { + // Use `tmp2` here: `reg` may be `spilltmp` if the `AMode` on this instruction + // was initially an `SPOffset`. Assert that `tmp2` is truly free to use. Note + // that no other instructions will be inserted here (we're emitting directly), + // and a live range of `tmp2` should not span this instruction, so this use + // should otherwise be correct. + debug_assert!(rd.to_reg() != tmp2_reg()); + debug_assert!(reg != tmp2_reg()); + let tmp = writable_tmp2_reg(); + for insn in Inst::load_constant(tmp, abs_offset, &mut |_| tmp).into_iter() { + insn.emit(sink, emit_info, state); + } + let add = Inst::AluRRR { + alu_op, + size: OperandSize::Size64, + rd, + rn: reg, + rm: tmp.to_reg(), + }; + add.emit(sink, emit_info, state); + } + } + &Inst::Paci { key } => { + let (crm, op2) = match key { + APIKey::AZ => (0b0011, 0b000), + APIKey::ASP => (0b0011, 0b001), + APIKey::BZ => (0b0011, 0b010), + APIKey::BSP => (0b0011, 0b011), + }; + + sink.put4(0xd503211f | (crm << 8) | (op2 << 5)); + } + &Inst::Xpaclri => sink.put4(0xd50320ff), + &Inst::Bti { targets } => { + let targets = match targets { + BranchTargetType::None => 0b00, + BranchTargetType::C => 0b01, + BranchTargetType::J => 0b10, + BranchTargetType::JC => 0b11, + }; + + sink.put4(0xd503241f | targets << 6); + } + &Inst::EmitIsland { needed_space } => { + if sink.island_needed(needed_space + 4) { + let jump_around_label = sink.get_label(); + let jmp = Inst::Jump { + dest: BranchTarget::Label(jump_around_label), + }; + jmp.emit(sink, emit_info, state); + sink.emit_island(needed_space + 4, &mut state.ctrl_plane); + sink.bind_label(jump_around_label, &mut state.ctrl_plane); + } + } + + &Inst::ElfTlsGetAddr { + ref symbol, + rd, + tmp, + } => { + assert_eq!(xreg(0), rd.to_reg()); + + // See the original proposal for TLSDESC. + // http://www.fsfla.org/~lxoliva/writeups/TLS/paper-lk2006.pdf + // + // Implement the TLSDESC instruction sequence: + // adrp x0, :tlsdesc:tlsvar + // ldr tmp, [x0, :tlsdesc_lo12:tlsvar] + // add x0, x0, :tlsdesc_lo12:tlsvar + // blr tmp + // mrs tmp, tpidr_el0 + // add x0, x0, tmp + // + // This is the instruction sequence that GCC emits for ELF GD TLS Relocations in aarch64 + // See: https://gcc.godbolt.org/z/e4j7MdErh + + // adrp x0, :tlsdesc:tlsvar + sink.add_reloc(Reloc::Aarch64TlsDescAdrPage21, &**symbol, 0); + Inst::Adrp { rd, off: 0 }.emit(sink, emit_info, state); + + // ldr tmp, [x0, :tlsdesc_lo12:tlsvar] + sink.add_reloc(Reloc::Aarch64TlsDescLd64Lo12, &**symbol, 0); + Inst::ULoad64 { + rd: tmp, + mem: AMode::reg(rd.to_reg()), + flags: MemFlags::trusted(), + } + .emit(sink, emit_info, state); + + // add x0, x0, :tlsdesc_lo12:tlsvar + sink.add_reloc(Reloc::Aarch64TlsDescAddLo12, &**symbol, 0); + Inst::AluRRImm12 { + alu_op: ALUOp::Add, + size: OperandSize::Size64, + rd, + rn: rd.to_reg(), + imm12: Imm12::maybe_from_u64(0).unwrap(), + } + .emit(sink, emit_info, state); + + // blr tmp + sink.add_reloc(Reloc::Aarch64TlsDescCall, &**symbol, 0); + Inst::CallInd { + info: crate::isa::Box::new(CallInfo::empty(tmp.to_reg(), CallConv::SystemV)), + } + .emit(sink, emit_info, state); + + // mrs tmp, tpidr_el0 + sink.put4(0xd53bd040 | machreg_to_gpr(tmp.to_reg())); + + // add x0, x0, tmp + Inst::AluRRR { + alu_op: ALUOp::Add, + size: OperandSize::Size64, + rd, + rn: rd.to_reg(), + rm: tmp.to_reg(), + } + .emit(sink, emit_info, state); + } + + &Inst::MachOTlsGetAddr { ref symbol, rd } => { + // Each thread local variable gets a descriptor, where the first xword of the descriptor is a pointer + // to a function that takes the descriptor address in x0, and after the function returns x0 + // contains the address for the thread local variable + // + // what we want to emit is basically: + // + // adrp x0,