From 31d06a4ddd0c2c63382b54756c44f053a2936fa1 Mon Sep 17 00:00:00 2001 From: Johannes Wilm Date: Mon, 10 Feb 2025 22:53:52 +0100 Subject: [PATCH 01/67] update pandoc repo --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 1da2561..11edfa1 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -41,8 +41,8 @@ jobs: - name: checkout uses: actions/checkout@v4 with: - repository: haskell-wasm/pandoc - ref: wasm + repository: johanneswilm/pandoc + ref: main path: pandoc - name: gen-plan-json From 75fbbed15b0afbb2ce219a929bcb5a0d0094fd63 Mon Sep 17 00:00:00 2001 From: Johannes Wilm Date: Mon, 10 Feb 2025 22:56:00 +0100 Subject: [PATCH 02/67] Update links --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index fec2f1a..92034a1 100644 --- a/README.md +++ b/README.md @@ -6,14 +6,14 @@ The latest version of `pandoc` CLI compiled as a standalone `wasm32-wasi` module that can be run by engines like `wasmtime` as well as browsers. -## [Live demo](https://tweag.github.io/pandoc-wasm) +## [Live demo](https://johanneswilm.github.io/pandoc-wasm) Stdin on the left, stdout on the right, command line arguments at the bottom. No convert button, output is produced dynamically as input changes. You're also more than welcome to fetch the -[`pandoc.wasm`](https://tweag.github.io/pandoc-wasm/pandoc.wasm) +[`pandoc.wasm`](https://johanneswilm.github.io/pandoc-wasm/pandoc.wasm) module and make your own customized app. `pandoc.wasm` is fully `wasm32-wasi` compliant and doesn't make use of any JSFFI feature in the ghc wasm backend. From c92ad774f0b0cfed47d21ef5b9886ffee1d6b741 Mon Sep 17 00:00:00 2001 From: Johannes Wilm Date: Mon, 10 Feb 2025 22:57:07 +0100 Subject: [PATCH 03/67] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 92034a1..3ae0f21 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # `pandoc-wasm` +*Fork for private experimentation.* + [![Chat on Matrix](https://matrix.to/img/matrix-badge.svg)](https://matrix.to/#/#haskell-wasm:matrix.terrorjack.com) The latest version of `pandoc` CLI compiled as a standalone From 3259e244d813b5adda3ffe442b2cfe71b2b804cb Mon Sep 17 00:00:00 2001 From: Johannes Wilm Date: Tue, 11 Feb 2025 01:37:44 +0100 Subject: [PATCH 04/67] use patch for build --- .github/workflows/build.yml | 21 +++- patch/pandoc.patch | 229 ++++++++++++++++++++++++++++++++++++ 2 files changed, 247 insertions(+), 3 deletions(-) create mode 100644 patch/pandoc.patch diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 11edfa1..9ae76ee 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -35,16 +35,22 @@ jobs: ~/.ghc-wasm/add_to_github_path.sh popd - - name: checkout + - name: Checkout Pandoc-wasm uses: actions/checkout@v4 - - name: checkout + - name: Checkout Pandoc uses: actions/checkout@v4 with: - repository: johanneswilm/pandoc + repository: jgm/pandoc ref: main path: pandoc + - name: Patch Pandoc sources + run: | + cd pandoc + patch -p1 < ../patch/pandoc.patch + cd .. + - name: gen-plan-json run: | pushd pandoc @@ -77,6 +83,15 @@ jobs: wasmtime run --dir $PWD::/ -- dist/pandoc.wasm pandoc/README.md -o pandoc/README.rst head --lines=20 pandoc/README.rst + - name: Upload to release + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: dist/pandoc.wasm + asset_name: pandoc.wasm + tag: ${{ github.ref }} + overwrite: true + - name: upload-pages-artifact uses: actions/upload-pages-artifact@v3 with: diff --git a/patch/pandoc.patch b/patch/pandoc.patch new file mode 100644 index 0000000..27a5fde --- /dev/null +++ b/patch/pandoc.patch @@ -0,0 +1,229 @@ +diff --git a/cabal.project b/cabal.project +index 4ca6da52e630..b3a4ffcbb87b 100644 +--- a/cabal.project ++++ b/cabal.project +@@ -2,10 +2,147 @@ packages: . + pandoc-lua-engine + pandoc-server + pandoc-cli +-tests: True +-flags: +embed_data_files ++tests: False ++ + constraints: skylighting-format-blaze-html >= 0.1.1.3, + skylighting-format-context >= 0.1.0.2, + -- for now (commercialhaskell/stackage#7545): + data-default-class <= 0.2, data-default <= 0.8 + ++allow-newer: all:zlib ++ ++package aeson ++ flags: -ordered-keymap ++ ++package crypton ++ ghc-options: -optc-DARGON2_NO_THREADS ++ ++package digest ++ flags: -pkg-config ++ ++package pandoc ++ flags: +embed_data_files ++ ++package pandoc-cli ++ flags: -lua -server ++ ++allow-newer: ++ all:Cabal, ++ all:Cabal-syntax, ++ all:array, ++ all:base, ++ all:binary, ++ all:bytestring, ++ all:containers, ++ all:deepseq, ++ all:directory, ++ all:exceptions, ++ all:filepath, ++ all:ghc, ++ all:ghc-bignum, ++ all:ghc-boot, ++ all:ghc-boot-th, ++ all:ghc-compact, ++ all:ghc-experimental, ++ all:ghc-heap, ++ all:ghc-internal, ++ all:ghc-platform, ++ all:ghc-prim, ++ all:ghc-toolchain, ++ all:ghci, ++ all:haskeline, ++ all:hpc, ++ all:integer-gmp, ++ all:mtl, ++ all:os-string, ++ all:parsec, ++ all:pretty, ++ all:process, ++ all:rts, ++ all:semaphore-compat, ++ all:stm, ++ all:system-cxx-std-lib, ++ all:template-haskell, ++ all:text, ++ all:time, ++ all:transformers, ++ all:unix, ++ all:xhtml ++ ++constraints: ++ Cabal installed, ++ Cabal-syntax installed, ++ array installed, ++ base installed, ++ binary installed, ++ bytestring installed, ++ containers installed, ++ deepseq installed, ++ directory installed, ++ exceptions installed, ++ filepath installed, ++ ghc installed, ++ ghc-bignum installed, ++ ghc-boot installed, ++ ghc-boot-th installed, ++ ghc-compact installed, ++ ghc-experimental installed, ++ ghc-heap installed, ++ ghc-internal installed, ++ ghc-platform installed, ++ ghc-prim installed, ++ ghc-toolchain installed, ++ ghci installed, ++ haskeline installed, ++ hpc installed, ++ integer-gmp installed, ++ mtl installed, ++ os-string installed, ++ parsec installed, ++ pretty installed, ++ process installed, ++ rts installed, ++ semaphore-compat installed, ++ stm installed, ++ system-cxx-std-lib installed, ++ template-haskell installed, ++ text installed, ++ time installed, ++ transformers installed, ++ unix installed, ++ xhtml installed ++ ++-- https://github.com/haskell/network/pull/598 ++source-repository-package ++ type: git ++ location: https://github.com/haskell-wasm/network.git ++ tag: ab92e48e9fdf3abe214f85fdbe5301c1280e14e9 ++ ++source-repository-package ++ type: git ++ location: https://github.com/haskell-wasm/foundation.git ++ tag: 8e6dd48527fb429c1922083a5030ef88e3d58dd3 ++ subdir: basement ++ ++source-repository-package ++ type: git ++ location: https://github.com/haskell-wasm/hs-memory.git ++ tag: a198a76c584dc2cfdcde6b431968de92a5fed65e ++ ++source-repository-package ++ type: git ++ location: https://github.com/haskell-wasm/xml.git ++ tag: bc793dc9bc29c92245d3482a54d326abd3ae1403 ++ subdir: xml-conduit ++ ++-- https://github.com/haskellari/splitmix/pull/73 ++source-repository-package ++ type: git ++ location: https://github.com/amesgen/splitmix ++ tag: 5f5b766d97dc735ac228215d240a3bb90bc2ff75 ++ ++source-repository-package ++ type: git ++ location: https://github.com/amesgen/cborg ++ tag: c3b5c696f62d04c0d87f55250bfc0016ab94d800 ++ subdir: cborg +diff --git a/pandoc-cli/pandoc-cli.cabal b/pandoc-cli/pandoc-cli.cabal +index 5b904b9906bd..66d92a1875f3 100644 +--- a/pandoc-cli/pandoc-cli.cabal ++++ b/pandoc-cli/pandoc-cli.cabal +@@ -61,7 +61,7 @@ common common-options + + common common-executable + import: common-options +- ghc-options: -rtsopts -with-rtsopts=-A8m -threaded ++ ghc-options: -rtsopts -with-rtsopts=-H64m + + executable pandoc + import: common-executable +@@ -74,6 +74,10 @@ executable pandoc + text + other-modules: PandocCLI.Lua + , PandocCLI.Server ++ ++ if arch(wasm32) ++ ghc-options: -optl-Wl,--export=__wasm_call_ctors,--export=hs_init_with_rtsopts,--export=malloc,--export=wasm_main ++ + if flag(nightly) + cpp-options: -DNIGHTLY + build-depends: template-haskell, +diff --git a/pandoc-cli/src/pandoc.hs b/pandoc-cli/src/pandoc.hs +index 019d0adedb15..520a858c89a2 100644 +--- a/pandoc-cli/src/pandoc.hs ++++ b/pandoc-cli/src/pandoc.hs +@@ -1,5 +1,7 @@ + {-# LANGUAGE CPP #-} ++{-# LANGUAGE ScopedTypeVariables #-} + {-# LANGUAGE TemplateHaskell #-} ++ + {- | + Module : Main + Copyright : Copyright (C) 2006-2024 John MacFarlane +@@ -34,6 +36,13 @@ import qualified Language.Haskell.TH as TH + import Data.Time + #endif + ++#if defined(wasm32_HOST_ARCH) ++import Control.Exception ++import Foreign ++import Foreign.C ++import System.IO ++#endif ++ + #ifdef NIGHTLY + versionSuffix :: String + versionSuffix = "-nightly-" ++ +@@ -44,6 +53,24 @@ versionSuffix :: String + versionSuffix = "" + #endif + ++#if defined(wasm32_HOST_ARCH) ++ ++foreign export ccall "wasm_main" wasm_main :: Ptr CChar -> Int -> IO () ++ ++wasm_main :: Ptr CChar -> Int -> IO () ++wasm_main raw_args_ptr raw_args_len = ++ catch act (\(err :: SomeException) -> hPrint stderr err) ++ where ++ act = do ++ args <- words <$> peekCStringLen (raw_args_ptr, raw_args_len) ++ free raw_args_ptr ++ engine <- getEngine ++ res <- parseOptionsFromArgs options defaultOpts "pandoc.wasm" $ args <> ["/in", "-o", "/out"] ++ case res of ++ Left e -> handleOptInfo engine e ++ Right opts -> convertWithOpts engine opts ++#endif ++ + main :: IO () + main = E.handle (handleError . Left) $ do + prg <- getProgName From 9d68a297a71606f1806d3b170999f0e9b7486dba Mon Sep 17 00:00:00 2001 From: Johannes Wilm Date: Tue, 11 Feb 2025 01:40:17 +0100 Subject: [PATCH 05/67] clarify readme wording --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3ae0f21..55b6e4f 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ need at least 9.10 since it's the earliest major version with (my non-official) backports for ghc wasm backend's Template Haskell & ghci support. -It's built using my +It's build-method is based on this [fork](https://github.com/haskell-wasm/pandoc/tree/wasm) which is based on latest `pandoc` release and patches dependencies, cabal config as well as some module code to make things compilable to wasm: From 9f59d2320e43504623bd2c106d5c7682945d69d3 Mon Sep 17 00:00:00 2001 From: Johannes Wilm Date: Tue, 11 Feb 2025 08:56:27 +0100 Subject: [PATCH 06/67] Upload releases to ga run when not tagged --- .github/workflows/build.yml | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9ae76ee..ad96e3d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,12 +1,9 @@ name: build -on: - merge_group: - pull_request: - push: - branches: - - master - workflow_dispatch: +on: [push, pull_request] + +permissions: + contents: write jobs: build: @@ -51,6 +48,13 @@ jobs: patch -p1 < ../patch/pandoc.patch cd .. + - name: Extract version from pandoc.cabal + id: extract-version + run: | + VERSION=$(grep '^version:' pandoc/pandoc.cabal | awk '{print $2}') + echo "Extracted version: $VERSION" + echo "VERSION=$VERSION" >> $GITHUB_ENV + - name: gen-plan-json run: | pushd pandoc @@ -83,12 +87,23 @@ jobs: wasmtime run --dir $PWD::/ -- dist/pandoc.wasm pandoc/README.md -o pandoc/README.rst head --lines=20 pandoc/README.rst + - name: Zip dist folder + run: | + zip -r pandoc-wasm-$VERSION.zip dist/ + + - name: Upload zipped file as artifact + uses: actions/upload-artifact@v3 + with: + name: pandoc-wasm-v$VERSION.zip + path: pandoc-wasm-v$VERSION.zip + - name: Upload to release + if: github.event_name == 'push' && contains(github.ref, 'refs/tags/') uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: dist/pandoc.wasm - asset_name: pandoc.wasm + file: pandoc-wasm-$VERSION.zip + asset_name: pandoc-wasm-$VERSION.zip tag: ${{ github.ref }} overwrite: true From 15afec9523c67d511d7a4bfad6b57153e4e9149a Mon Sep 17 00:00:00 2001 From: Johannes Wilm Date: Tue, 11 Feb 2025 09:00:45 +0100 Subject: [PATCH 07/67] actions/upload-artifact@v4 --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ad96e3d..4dd6a94 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -92,7 +92,7 @@ jobs: zip -r pandoc-wasm-$VERSION.zip dist/ - name: Upload zipped file as artifact - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: pandoc-wasm-v$VERSION.zip path: pandoc-wasm-v$VERSION.zip From 4d9de2e9b7eb021b856826482c545a02583f19cd Mon Sep 17 00:00:00 2001 From: Johannes Wilm Date: Tue, 11 Feb 2025 10:11:04 +0100 Subject: [PATCH 08/67] fix filename --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4dd6a94..0bdf8dc 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -94,8 +94,8 @@ jobs: - name: Upload zipped file as artifact uses: actions/upload-artifact@v4 with: - name: pandoc-wasm-v$VERSION.zip - path: pandoc-wasm-v$VERSION.zip + name: pandoc-wasm-$VERSION.zip + path: pandoc-wasm-$VERSION.zip - name: Upload to release if: github.event_name == 'push' && contains(github.ref, 'refs/tags/') From 25ad35c1d63b5b59ac094d15ad9b00c8e8444aa5 Mon Sep 17 00:00:00 2001 From: Johannes Wilm Date: Tue, 11 Feb 2025 10:36:51 +0100 Subject: [PATCH 09/67] change reference to VERSION --- .github/workflows/build.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0bdf8dc..d443115 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -89,21 +89,21 @@ jobs: - name: Zip dist folder run: | - zip -r pandoc-wasm-$VERSION.zip dist/ + zip -r pandoc-wasm-${{ VERSION }}.zip dist/ - name: Upload zipped file as artifact uses: actions/upload-artifact@v4 with: - name: pandoc-wasm-$VERSION.zip - path: pandoc-wasm-$VERSION.zip + name: pandoc-wasm-${{ VERSION }}.zip + path: pandoc-wasm-${{ VERSION }}.zip - name: Upload to release if: github.event_name == 'push' && contains(github.ref, 'refs/tags/') uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: pandoc-wasm-$VERSION.zip - asset_name: pandoc-wasm-$VERSION.zip + file: pandoc-wasm-${{ VERSION }}.zip + asset_name: pandoc-wasm-${{ VERSION }}.zip tag: ${{ github.ref }} overwrite: true From e1317d40d4ed458665f38d92bbb26df77e02bc0b Mon Sep 17 00:00:00 2001 From: Johannes Wilm Date: Tue, 11 Feb 2025 10:45:09 +0100 Subject: [PATCH 10/67] use env.VERSION --- .github/workflows/build.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d443115..5ca27e7 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -89,21 +89,21 @@ jobs: - name: Zip dist folder run: | - zip -r pandoc-wasm-${{ VERSION }}.zip dist/ + zip -r pandoc-wasm-${{ env.VERSION }}.zip dist/ - name: Upload zipped file as artifact uses: actions/upload-artifact@v4 with: - name: pandoc-wasm-${{ VERSION }}.zip - path: pandoc-wasm-${{ VERSION }}.zip + name: pandoc-wasm-${{ env.VERSION }}.zip + path: pandoc-wasm-${{ env.VERSION }}.zip - name: Upload to release if: github.event_name == 'push' && contains(github.ref, 'refs/tags/') uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: pandoc-wasm-${{ VERSION }}.zip - asset_name: pandoc-wasm-${{ VERSION }}.zip + file: pandoc-wasm-${{ env.VERSION }}.zip + asset_name: pandoc-wasm-${{ env.VERSION }}.zip tag: ${{ github.ref }} overwrite: true From 94f0e6898de64d158fd6cb9705c4698feff01e8e Mon Sep 17 00:00:00 2001 From: Johannes Wilm Date: Tue, 11 Feb 2025 11:32:38 +0100 Subject: [PATCH 11/67] use tagged pandoc version --- .github/workflows/build.yml | 19 +++++++++++-------- version.txt | 1 + 2 files changed, 12 insertions(+), 8 deletions(-) create mode 100644 version.txt diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5ca27e7..02e0535 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -35,11 +35,21 @@ jobs: - name: Checkout Pandoc-wasm uses: actions/checkout@v4 + - name: Extract versions from version.txt + id: extract-version + run: | + VERSION=$(cat version.txt) + echo "Extracted version: $VERSION" + PANDOC_VERSION=$(echo $VERSION | cut -d '+' -f 2) + echo "Extracted Pandoc version: $VERSION" + echo "VERSION=$VERSION" >> $GITHUB_ENV + echo "PANDOC_VERSION=$PANDOC_VERSION" >> $GITHUB_ENV + - name: Checkout Pandoc uses: actions/checkout@v4 with: repository: jgm/pandoc - ref: main + ref: ${{ env.PANDOC_VERSION }} path: pandoc - name: Patch Pandoc sources @@ -48,13 +58,6 @@ jobs: patch -p1 < ../patch/pandoc.patch cd .. - - name: Extract version from pandoc.cabal - id: extract-version - run: | - VERSION=$(grep '^version:' pandoc/pandoc.cabal | awk '{print $2}') - echo "Extracted version: $VERSION" - echo "VERSION=$VERSION" >> $GITHUB_ENV - - name: gen-plan-json run: | pushd pandoc diff --git a/version.txt b/version.txt new file mode 100644 index 0000000..e3cec99 --- /dev/null +++ b/version.txt @@ -0,0 +1 @@ +0.1+3.6.3 From a92165e3b4718328bb04685c881355b775dbba8c Mon Sep 17 00:00:00 2001 From: Johannes Wilm Date: Tue, 11 Feb 2025 11:46:54 +0100 Subject: [PATCH 12/67] reorganize github actions --- .github/workflows/build.yml | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 02e0535..a0d2250 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,19 +1,13 @@ -name: build - -on: [push, pull_request] - -permissions: - contents: write +name: Build, deploy and release jobs: build: - name: build runs-on: ubuntu-latest permissions: pages: write id-token: write + contents: write steps: - - name: setup-alex-happy run: | pushd "$(mktemp -d)" @@ -101,7 +95,7 @@ jobs: path: pandoc-wasm-${{ env.VERSION }}.zip - name: Upload to release - if: github.event_name == 'push' && contains(github.ref, 'refs/tags/') + if: ${{ github.event_name == 'release' }} uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} From 4d78291c786d55bff0a7de1e5f94c7d668aef7e6 Mon Sep 17 00:00:00 2001 From: Johannes Wilm Date: Tue, 11 Feb 2025 11:52:40 +0100 Subject: [PATCH 13/67] switch github events release type --- .github/workflows/build.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a0d2250..c85f15b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,7 +1,10 @@ +on: [push, pull_request, published] + name: Build, deploy and release jobs: build: + runs-on: ubuntu-latest permissions: pages: write @@ -95,7 +98,7 @@ jobs: path: pandoc-wasm-${{ env.VERSION }}.zip - name: Upload to release - if: ${{ github.event_name == 'release' }} + if: ${{ github.event_name == 'published' }} uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} From d99cc8b5dfb8740fb95666633cb4576a51407e28 Mon Sep 17 00:00:00 2001 From: Johannes Wilm Date: Tue, 11 Feb 2025 11:55:35 +0100 Subject: [PATCH 14/67] github action events change --- .github/workflows/build.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c85f15b..b2e589b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,4 +1,8 @@ -on: [push, pull_request, published] +on: + push: + pull_request: + release: + types: [published] name: Build, deploy and release @@ -98,7 +102,7 @@ jobs: path: pandoc-wasm-${{ env.VERSION }}.zip - name: Upload to release - if: ${{ github.event_name == 'published' }} + if: ${{ github.event_name == 'release' }} uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} From 5245d39dcecb35b3dc8c33f667f2ce71e82c578c Mon Sep 17 00:00:00 2001 From: Johannes Wilm Date: Tue, 11 Feb 2025 11:56:14 +0100 Subject: [PATCH 15/67] 0.2+3.6.3 --- version.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.txt b/version.txt index e3cec99..183d23f 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -0.1+3.6.3 +0.2+3.6.3 From dd1429eb535960bd48b03204d10ee72f24e721b0 Mon Sep 17 00:00:00 2001 From: Johannes Wilm Date: Tue, 11 Feb 2025 13:22:05 +0100 Subject: [PATCH 16/67] Add package.json --- .github/workflows/build.yml | 35 ++++++++++++++----- {frontend => demo}/index.html | 0 {frontend => demo}/index.js | 2 +- package.json | 22 ++++++++++++ src/index.js | 65 +++++++++++++++++++++++++++++++++++ version.txt | 1 - 6 files changed, 114 insertions(+), 11 deletions(-) rename {frontend => demo}/index.html (100%) rename {frontend => demo}/index.js (99%) create mode 100644 package.json create mode 100644 src/index.js delete mode 100644 version.txt diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b2e589b..e54ae15 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -39,7 +39,7 @@ jobs: - name: Extract versions from version.txt id: extract-version run: | - VERSION=$(cat version.txt) + VERSION=$(jq -r .version package.json) echo "Extracted version: $VERSION" PANDOC_VERSION=$(echo $VERSION | cut -d '+' -f 2) echo "Extracted Pandoc version: $VERSION" @@ -84,7 +84,7 @@ jobs: run: | mkdir dist wasm-opt --low-memory-unused --converge --gufa --flatten --rereloop -Oz $(find pandoc -type f -name pandoc.wasm) -o dist/pandoc.wasm - cp frontend/*.html frontend/*.js dist + cp src/*.js dist - name: test run: | @@ -93,7 +93,7 @@ jobs: - name: Zip dist folder run: | - zip -r pandoc-wasm-${{ env.VERSION }}.zip dist/ + zip -r pandoc-wasm-${{ env.VERSION }}.zip dist/* - name: Upload zipped file as artifact uses: actions/upload-artifact@v4 @@ -101,6 +101,19 @@ jobs: name: pandoc-wasm-${{ env.VERSION }}.zip path: pandoc-wasm-${{ env.VERSION }}.zip + - name: Prepare pages + run: | + cp dist/pandoc.wasm demo/ + + - name: Upload pages + uses: actions/upload-pages-artifact@v3 + with: + path: demo + retention-days: 90 + + - name: deploy-pages + uses: actions/deploy-pages@v4 + - name: Upload to release if: ${{ github.event_name == 'release' }} uses: svenstaro/upload-release-action@v2 @@ -111,11 +124,15 @@ jobs: tag: ${{ github.ref }} overwrite: true - - name: upload-pages-artifact - uses: actions/upload-pages-artifact@v3 + - name: "Install node" + if: ${{ github.event_name == 'release' }} + uses: actions/setup-node@v3 with: - path: dist - retention-days: 90 + node-version: "22" - - name: deploy-pages - uses: actions/deploy-pages@v4 + - name: Distribute via NPM // (github packages while in PR) + if: ${{ github.event_name == 'release' }} + uses: JS-DevTools/npm-publish@v3 + with: + token: ${{ secrets.GITHUB_TOKEN }} + registry: "https://npm.pkg.github.com" diff --git a/frontend/index.html b/demo/index.html similarity index 100% rename from frontend/index.html rename to demo/index.html diff --git a/frontend/index.js b/demo/index.js similarity index 99% rename from frontend/index.js rename to demo/index.js index 008bf6d..4cdfbd7 100644 --- a/frontend/index.js +++ b/demo/index.js @@ -4,7 +4,7 @@ import { File, ConsoleStdout, PreopenDirectory, -} from "https://cdn.jsdelivr.net/npm/@bjorn3/browser_wasi_shim@0.3.0/dist/index.js"; +} from "https://cdn.jsdelivr.net/npm/@bjorn3/browser_wasi_shim@0.4.0/dist/index.js"; const args = ["pandoc.wasm", "+RTS", "-H64m", "-RTS"]; const env = []; diff --git a/package.json b/package.json new file mode 100644 index 0000000..d081f04 --- /dev/null +++ b/package.json @@ -0,0 +1,22 @@ +{ + "name": "pandoc-wasm", + "version": "0.2+3.6.3", + "description": "Pandom transpiled as WASM to be used in browsers and node.js.", + "main": "dist/index.js", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "repository": { + "type": "git", + "url": "github.com:johanneswilm/pandoc-wasm.git" + }, + "keywords": [ + "pandoc", + "wasm" + ], + "author": "Terrorjack", + "license": "MIT", + "dependencies": [ + "browser_wasi_shim": "^0.4.0" + ] +} diff --git a/src/index.js b/src/index.js new file mode 100644 index 0000000..6b77395 --- /dev/null +++ b/src/index.js @@ -0,0 +1,65 @@ +import { + WASI, + OpenFile, + File, + ConsoleStdout, + PreopenDirectory, +} from "browser_wasi_shim"; + +const args = ["pandoc.wasm", "+RTS", "-H64m", "-RTS"]; +const env = []; +const in_file = new File(new Uint8Array(), { readonly: true }); +const out_file = new File(new Uint8Array(), { readonly: false }); +const fds = [ + new OpenFile(new File(new Uint8Array(), { readonly: true })), + ConsoleStdout.lineBuffered((msg) => console.log(`[WASI stdout] ${msg}`)), + ConsoleStdout.lineBuffered((msg) => console.warn(`[WASI stderr] ${msg}`)), + new PreopenDirectory("/", [ + ["in", in_file], + ["out", out_file], + ]), +]; +const options = { debug: false }; +const wasi = new WASI(args, env, fds, options); +const { instance } = await WebAssembly.instantiateStreaming( + fetch("./pandoc.wasm"), + { + wasi_snapshot_preview1: wasi.wasiImport, + } +); + +wasi.initialize(instance); +instance.exports.__wasm_call_ctors(); + +function memory_data_view() { + return new DataView(instance.exports.memory.buffer); +} + +const argc_ptr = instance.exports.malloc(4); +memory_data_view().setUint32(argc_ptr, args.length, true); +const argv = instance.exports.malloc(4 * (args.length + 1)); +for (let i = 0; i < args.length; ++i) { + const arg = instance.exports.malloc(args[i].length + 1); + new TextEncoder().encodeInto( + args[i], + new Uint8Array(instance.exports.memory.buffer, arg, args[i].length) + ); + memory_data_view().setUint8(arg + args[i].length, 0); + memory_data_view().setUint32(argv + 4 * i, arg, true); +} +memory_data_view().setUint32(argv + 4 * args.length, 0, true); +const argv_ptr = instance.exports.malloc(4); +memory_data_view().setUint32(argv_ptr, argv, true); + +instance.exports.hs_init_with_rtsopts(argc_ptr, argv_ptr); + +export function pandoc(args_str, in_str) { + const args_ptr = instance.exports.malloc(args_str.length); + new TextEncoder().encodeInto( + args_str, + new Uint8Array(instance.exports.memory.buffer, args_ptr, args_str.length) + ); + in_file.data = new TextEncoder().encode(in_str); + instance.exports.wasm_main(args_ptr, args_str.length); + return new TextDecoder("utf-8", { fatal: true }).decode(out_file.data); +} diff --git a/version.txt b/version.txt deleted file mode 100644 index 183d23f..0000000 --- a/version.txt +++ /dev/null @@ -1 +0,0 @@ -0.2+3.6.3 From 59f7aebbf3bb7a486a0c341b7649c4967658e456 Mon Sep 17 00:00:00 2001 From: Johannes Wilm Date: Tue, 11 Feb 2025 13:30:05 +0100 Subject: [PATCH 17/67] fix package.json syntax --- package.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package.json b/package.json index d081f04..282259d 100644 --- a/package.json +++ b/package.json @@ -16,7 +16,7 @@ ], "author": "Terrorjack", "license": "MIT", - "dependencies": [ + "dependencies": { "browser_wasi_shim": "^0.4.0" - ] + } } From c59abb71d844b185155515a31d58c3680b3ca761 Mon Sep 17 00:00:00 2001 From: Johannes Wilm Date: Tue, 11 Feb 2025 13:32:50 +0100 Subject: [PATCH 18/67] 0.3+3.6.3 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 282259d..b3e954c 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "pandoc-wasm", - "version": "0.2+3.6.3", + "version": "0.3+3.6.3", "description": "Pandom transpiled as WASM to be used in browsers and node.js.", "main": "dist/index.js", "scripts": { From 5632d85fdbdd4ca14a3e594cf49a7836860c0249 Mon Sep 17 00:00:00 2001 From: Johannes Wilm Date: Tue, 11 Feb 2025 13:58:36 +0100 Subject: [PATCH 19/67] reorganize zipping --- .github/workflows/build.yml | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e54ae15..7a733bf 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -91,16 +91,6 @@ jobs: wasmtime run --dir $PWD::/ -- dist/pandoc.wasm pandoc/README.md -o pandoc/README.rst head --lines=20 pandoc/README.rst - - name: Zip dist folder - run: | - zip -r pandoc-wasm-${{ env.VERSION }}.zip dist/* - - - name: Upload zipped file as artifact - uses: actions/upload-artifact@v4 - with: - name: pandoc-wasm-${{ env.VERSION }}.zip - path: pandoc-wasm-${{ env.VERSION }}.zip - - name: Prepare pages run: | cp dist/pandoc.wasm demo/ @@ -114,6 +104,19 @@ jobs: - name: deploy-pages uses: actions/deploy-pages@v4 + - name: Upload as build artifact + uses: actions/upload-artifact@v4 + with: + name: pandoc-wasm-${{ env.VERSION }} + path: dist + + - name: Zip dist folder + if: ${{ github.event_name == 'release' }} + run: | + cd dist + zip -r ../pandoc-wasm-${{ env.VERSION }}.zip . + cd .. + - name: Upload to release if: ${{ github.event_name == 'release' }} uses: svenstaro/upload-release-action@v2 From 23ce56a80176a679c0a05b9b2c10454f6bf778cc Mon Sep 17 00:00:00 2001 From: Johannes Wilm Date: Tue, 11 Feb 2025 14:09:42 +0100 Subject: [PATCH 20/67] Use semver (requirement by npm) --- .github/workflows/build.yml | 2 +- package.json | 2 +- pandoc-version.txt | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 pandoc-version.txt diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7a733bf..b072175 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -41,7 +41,7 @@ jobs: run: | VERSION=$(jq -r .version package.json) echo "Extracted version: $VERSION" - PANDOC_VERSION=$(echo $VERSION | cut -d '+' -f 2) + PANDOC_VERSION=$(cat pandoc-version.txt) echo "Extracted Pandoc version: $VERSION" echo "VERSION=$VERSION" >> $GITHUB_ENV echo "PANDOC_VERSION=$PANDOC_VERSION" >> $GITHUB_ENV diff --git a/package.json b/package.json index b3e954c..09fa6ee 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "pandoc-wasm", - "version": "0.3+3.6.3", + "version": "0.4.0", "description": "Pandom transpiled as WASM to be used in browsers and node.js.", "main": "dist/index.js", "scripts": { diff --git a/pandoc-version.txt b/pandoc-version.txt new file mode 100644 index 0000000..4a788a0 --- /dev/null +++ b/pandoc-version.txt @@ -0,0 +1 @@ +3.6.3 From ac6efee4336f6de650beee84311aa1bd1d9569b1 Mon Sep 17 00:00:00 2001 From: Johannes Wilm Date: Tue, 11 Feb 2025 14:39:30 +0100 Subject: [PATCH 21/67] set up for distribution via npm --- .github/workflows/build.yml | 11 ++++++++--- package.json | 4 ++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b072175..c074c0c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -133,9 +133,14 @@ jobs: with: node-version: "22" - - name: Distribute via NPM // (github packages while in PR) + - name: Add package.json + if: ${{ github.event_name == 'release' }} + run: | + cp package.json dist/ + + - name: Distribute via NPM if: ${{ github.event_name == 'release' }} uses: JS-DevTools/npm-publish@v3 with: - token: ${{ secrets.GITHUB_TOKEN }} - registry: "https://npm.pkg.github.com" + package: "dist" + token: ${{ secrets.NPM_TOKEN }} diff --git a/package.json b/package.json index 09fa6ee..b07e132 100644 --- a/package.json +++ b/package.json @@ -1,8 +1,8 @@ { "name": "pandoc-wasm", "version": "0.4.0", - "description": "Pandom transpiled as WASM to be used in browsers and node.js.", - "main": "dist/index.js", + "description": "Pandoc transpiled as WASM to be used in browsers.", + "main": "index.js", "scripts": { "test": "echo \"Error: no test specified\" && exit 1" }, From 6cf1ed21856d8169e78883e2c99434be457daf11 Mon Sep 17 00:00:00 2001 From: Johannes Wilm Date: Thu, 13 Feb 2025 14:56:13 +0100 Subject: [PATCH 22/67] Add maintainership. make exports work, add example to readme. --- .github/workflows/build.yml | 8 +- README.md | 79 ++++++++++------ package.json | 2 +- src/index.js | 182 ++++++++++++++++++++++++------------ 4 files changed, 177 insertions(+), 94 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c074c0c..64a73d8 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -55,9 +55,9 @@ jobs: - name: Patch Pandoc sources run: | - cd pandoc + pushd pandoc patch -p1 < ../patch/pandoc.patch - cd .. + popd - name: gen-plan-json run: | @@ -113,9 +113,9 @@ jobs: - name: Zip dist folder if: ${{ github.event_name == 'release' }} run: | - cd dist + pushd dist zip -r ../pandoc-wasm-${{ env.VERSION }}.zip . - cd .. + popd - name: Upload to release if: ${{ github.event_name == 'release' }} diff --git a/README.md b/README.md index 55b6e4f..d492300 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,7 @@ # `pandoc-wasm` -*Fork for private experimentation.* +** In search of maintainer:** I have temporarily taken over maintainership of this package due to there being no package on NPM. However, I know very little about wasm and haskell and would like for someone else to take this package again. (@Terrorjack ?) -[![Chat on Matrix](https://matrix.to/img/matrix-badge.svg)](https://matrix.to/#/#haskell-wasm:matrix.terrorjack.com) The latest version of `pandoc` CLI compiled as a standalone `wasm32-wasi` module that can be run by engines like `wasmtime` as @@ -14,35 +13,58 @@ Stdin on the left, stdout on the right, command line arguments at the bottom. No convert button, output is produced dynamically as input changes. -You're also more than welcome to fetch the -[`pandoc.wasm`](https://johanneswilm.github.io/pandoc-wasm/pandoc.wasm) -module and make your own customized app. `pandoc.wasm` is fully -`wasm32-wasi` compliant and doesn't make use of any JSFFI feature in -the ghc wasm backend. -## Building +## To use -`pandoc.wasm` is built with 9.12 flavour of ghc wasm backend in CI, -which can be installed via -[`ghc-wasm-meta`](https://gitlab.haskell.org/haskell-wasm/ghc-wasm-meta). You -need at least 9.10 since it's the earliest major version with (my -non-official) backports for ghc wasm backend's Template Haskell & ghci -support. +1. Make `pandoc-wasm` a dependency in your project.json. -It's build-method is based on this -[fork](https://github.com/haskell-wasm/pandoc/tree/wasm) which is -based on latest `pandoc` release and patches dependencies, cabal -config as well as some module code to make things compilable to wasm: +2. In your bundler mark "wasm" as an asset/resource. For example in rspack, in your config file: -- No http client/server functionality. `wasip1` doesn't have proper - sockets support anyway, and support for future versions of wasi is - not on my radar for now. -- No lua support. lua requires `setjmp`/`longjmp` which already work - in `wasi-libc` to some extent, but that requires wasm exception - handling feature which is not supported by `wasmtime` yet. +```js +module.exports = { + ... + module: { + ... + rules: [ + ... + { + test: /\.(wasm)$/, + type: "asset/resource" + } + ... + ] + ... + } + ... +} +``` -Other functionalities should just work, if not feel free to file a bug -report :) +3. Import `pandoc` from `pandoc-wasm` like this: + +```js +import { pandoc } from "pandoc-wasm" +``` + +4. Execute it like this (it's async): + +```js +const output = await pandoc( + '-s -f json -t markdown', // command line switches + inputFileContents, // string for text formats or blob for binary formats + [ // Additional files - for example bibliography or images + { + filename: 'image13.png', + contents: ..., // string for text formats or blob for binary formats + }, + ... + ] +) +``` + +The output will either be a string (for text formats) or a blob for binary formats. + + +**TODO:** Obtain extracted media files. I know too little about wasm to figure out how to get them out. ## Acknowledgements @@ -50,9 +72,10 @@ Thanks to John MacFarlane and all the contributors who made `pandoc` possible: a fantastic tool that has benefited many developers and is a source of pride for the Haskell community! -Thanks to all past efforts of using `asterius` to compile `pandoc` to -wasm, including but not limited to: +Thanks to all efforts to make `pandoc` run with wasm, including but not limited to: +- amesgen [`Don't patch out network`](https://github.com/haskell-wasm/pandoc/pull/1) +- Cheng Shao [`pandoc-wasm`](https://github.com/tweag/pandoc-wasm) - George Stagg's [`pandoc-wasm`](https://github.com/georgestagg/pandoc-wasm) - Yuto Takahashi's [`wasm-pandoc`](https://github.com/y-taka-23/wasm-pandoc) - My legacy asterius pandoc [demo](https://asterius.netlify.app/demo/pandoc/pandoc.html) diff --git a/package.json b/package.json index b07e132..b4e06cf 100644 --- a/package.json +++ b/package.json @@ -14,7 +14,7 @@ "pandoc", "wasm" ], - "author": "Terrorjack", + "author": "Johannes Wilm", "license": "MIT", "dependencies": { "browser_wasi_shim": "^0.4.0" diff --git a/src/index.js b/src/index.js index 6b77395..b899735 100644 --- a/src/index.js +++ b/src/index.js @@ -1,65 +1,125 @@ import { - WASI, - OpenFile, - File, - ConsoleStdout, - PreopenDirectory, -} from "browser_wasi_shim"; - -const args = ["pandoc.wasm", "+RTS", "-H64m", "-RTS"]; -const env = []; -const in_file = new File(new Uint8Array(), { readonly: true }); -const out_file = new File(new Uint8Array(), { readonly: false }); -const fds = [ - new OpenFile(new File(new Uint8Array(), { readonly: true })), - ConsoleStdout.lineBuffered((msg) => console.log(`[WASI stdout] ${msg}`)), - ConsoleStdout.lineBuffered((msg) => console.warn(`[WASI stderr] ${msg}`)), - new PreopenDirectory("/", [ - ["in", in_file], - ["out", out_file], - ]), -]; -const options = { debug: false }; -const wasi = new WASI(args, env, fds, options); -const { instance } = await WebAssembly.instantiateStreaming( - fetch("./pandoc.wasm"), - { - wasi_snapshot_preview1: wasi.wasiImport, - } -); - -wasi.initialize(instance); -instance.exports.__wasm_call_ctors(); - -function memory_data_view() { - return new DataView(instance.exports.memory.buffer); -} + WASI, + OpenFile, + File, + ConsoleStdout, + PreopenDirectory, +} from "@bjorn3/browser_wasi_shim"; +import pandocWasmLocation from "./pandoc.wasm" + +const pandocWasmFetch = await fetch(pandocWasmLocation) +const pandocWasm = await pandocWasmFetch.arrayBuffer() +const args = ["pandoc.wasm", "+RTS", "-H64m", "-RTS"] +const env = [] +const inFile = new File(new Uint8Array(), { + readonly: true +}) +const outFile = new File(new Uint8Array(), { + readonly: false +}) + + +async function toUint8Array(inData) { + let uint8Array; + + if (typeof inData === 'string') { + // If inData is a text string, convert it to a Uint8Array + const encoder = new TextEncoder() + uint8Array = encoder.encode(inData) + } else if (inData instanceof Blob) { + // If inData is a Blob, read it as an ArrayBuffer and then convert to Uint8Array + const arrayBuffer = await inData.arrayBuffer() + uint8Array = new Uint8Array(arrayBuffer) + } else { + throw new Error('Unsupported type: inData must be a string or a Blob') + } -const argc_ptr = instance.exports.malloc(4); -memory_data_view().setUint32(argc_ptr, args.length, true); -const argv = instance.exports.malloc(4 * (args.length + 1)); -for (let i = 0; i < args.length; ++i) { - const arg = instance.exports.malloc(args[i].length + 1); - new TextEncoder().encodeInto( - args[i], - new Uint8Array(instance.exports.memory.buffer, arg, args[i].length) - ); - memory_data_view().setUint8(arg + args[i].length, 0); - memory_data_view().setUint32(argv + 4 * i, arg, true); + return uint8Array } -memory_data_view().setUint32(argv + 4 * args.length, 0, true); -const argv_ptr = instance.exports.malloc(4); -memory_data_view().setUint32(argv_ptr, argv, true); - -instance.exports.hs_init_with_rtsopts(argc_ptr, argv_ptr); - -export function pandoc(args_str, in_str) { - const args_ptr = instance.exports.malloc(args_str.length); - new TextEncoder().encodeInto( - args_str, - new Uint8Array(instance.exports.memory.buffer, args_ptr, args_str.length) - ); - in_file.data = new TextEncoder().encode(in_str); - instance.exports.wasm_main(args_ptr, args_str.length); - return new TextDecoder("utf-8", { fatal: true }).decode(out_file.data); + + + +export async function pandoc(args_str, inData, resources = []) { + + const files = [ + ["in", inFile], + ["out", outFile], + ] + + for await(const resource of resources) { + const contents = await toUint8Array(resource.contents) + files.push([resource.filename, new File(contents, { + readonly: true + })]) + } + + const fds = [ + new OpenFile(new File(new Uint8Array(), { + readonly: true + })), + ConsoleStdout.lineBuffered((msg) => console.log(`[WASI stdout] ${msg}`)), + ConsoleStdout.lineBuffered((msg) => console.warn(`[WASI stderr] ${msg}`)), + new PreopenDirectory("/", files), + ] + const options = { + debug: false + } + const wasi = new WASI(args, env, fds, options) + + const { + instance + } = await WebAssembly.instantiate( + pandocWasm, { + wasi_snapshot_preview1: wasi.wasiImport, + } + ) + + wasi.initialize(instance) + instance.exports.__wasm_call_ctors() + + function memory_data_view() { + return new DataView(instance.exports.memory.buffer) + } + + const argc_ptr = instance.exports.malloc(4) + memory_data_view().setUint32(argc_ptr, args.length, true) + const argv = instance.exports.malloc(4 * (args.length + 1)) + for (let i = 0; i < args.length; ++i) { + const arg = instance.exports.malloc(args[i].length + 1) + new TextEncoder().encodeInto( + args[i], + new Uint8Array(instance.exports.memory.buffer, arg, args[i].length) + ) + memory_data_view().setUint8(arg + args[i].length, 0) + memory_data_view().setUint32(argv + 4 * i, arg, true) + } + memory_data_view().setUint32(argv + 4 * args.length, 0, true) + const argv_ptr = instance.exports.malloc(4) + memory_data_view().setUint32(argv_ptr, argv, true) + + instance.exports.hs_init_with_rtsopts(argc_ptr, argv_ptr) + + const args_ptr = instance.exports.malloc(args_str.length) + new TextEncoder().encodeInto( + args_str, + new Uint8Array(instance.exports.memory.buffer, args_ptr, args_str.length) + ) + + inFile.data = await toUint8Array(inData) + + instance.exports.wasm_main(args_ptr, args_str.length) + let outData + try { + // Attempt to decode the data as UTF-8 text + const textDecoder = new TextDecoder("utf-8", { + fatal: true + }) + // Return as string if successful + outData = textDecoder.decode(outFile.data) + } catch (e) { + // If decoding fails, assume it's binary data and return as Blob + outData = new Blob([outFile.data]) + } + + return outData } From 055a9dca7bbfe591ec6d6531e72b29716958c30e Mon Sep 17 00:00:00 2001 From: Johannes Wilm Date: Thu, 13 Feb 2025 14:56:35 +0100 Subject: [PATCH 23/67] lint --- src/index.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/index.js b/src/index.js index b899735..8d5238f 100644 --- a/src/index.js +++ b/src/index.js @@ -20,7 +20,7 @@ const outFile = new File(new Uint8Array(), { async function toUint8Array(inData) { - let uint8Array; + let uint8Array if (typeof inData === 'string') { // If inData is a text string, convert it to a Uint8Array From 96ec06d1193b737ff01fa1cfa34cfa68a7efea43 Mon Sep 17 00:00:00 2001 From: Johannes Wilm Date: Thu, 13 Feb 2025 14:57:25 +0100 Subject: [PATCH 24/67] lint --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d492300..16056ae 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # `pandoc-wasm` -** In search of maintainer:** I have temporarily taken over maintainership of this package due to there being no package on NPM. However, I know very little about wasm and haskell and would like for someone else to take this package again. (@Terrorjack ?) +**In search of maintainer:** I have temporarily taken over maintainership of this package due to there being no package on NPM. However, I know very little about wasm and haskell and would like for someone else to take this package again. (@Terrorjack ?) The latest version of `pandoc` CLI compiled as a standalone From e1f914db5c0f2015f0c0a28373198cc5c532a877 Mon Sep 17 00:00:00 2001 From: Johannes Wilm Date: Thu, 13 Feb 2025 18:02:17 +0100 Subject: [PATCH 25/67] release to releases branch (while waiting for npm access) --- .github/workflows/build.yml | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 64a73d8..8cc3539 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -138,9 +138,18 @@ jobs: run: | cp package.json dist/ - - name: Distribute via NPM + - name: Deploy if: ${{ github.event_name == 'release' }} - uses: JS-DevTools/npm-publish@v3 - with: - package: "dist" - token: ${{ secrets.NPM_TOKEN }} + uses: s0/git-publish-subdir-action@develop + env: + REPO: self + BRANCH: releases + FOLDER: dist + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + # - name: Distribute via NPM + # if: ${{ github.event_name == 'release' }} + # uses: JS-DevTools/npm-publish@v3 + # with: + # package: "dist" + # token: ${{ secrets.NPM_TOKEN }} From e4062a44cf418eb626768920e91abb24a7d55de9 Mon Sep 17 00:00:00 2001 From: Johannes Wilm Date: Thu, 13 Feb 2025 18:03:39 +0100 Subject: [PATCH 26/67] 0.5.1 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index b4e06cf..09810d5 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "pandoc-wasm", - "version": "0.4.0", + "version": "0.5.1", "description": "Pandoc transpiled as WASM to be used in browsers.", "main": "index.js", "scripts": { From ecdf7cb0cccdab8b1a532b4e9f7bc9263a257e58 Mon Sep 17 00:00:00 2001 From: Johannes Wilm Date: Thu, 13 Feb 2025 19:21:03 +0100 Subject: [PATCH 27/67] Add mediafile output --- README.md | 11 +++++++--- src/index.js | 59 +++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 52 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 16056ae..0eba22c 100644 --- a/README.md +++ b/README.md @@ -59,12 +59,17 @@ const output = await pandoc( ... ] ) -``` -The output will either be a string (for text formats) or a blob for binary formats. +console.log(output) + +{ + out: '...', + mediaFiles: Map {'media': Map {'image1.jpg' => Blob, 'image2.png' => Blob, ...}} +} +``` -**TODO:** Obtain extracted media files. I know too little about wasm to figure out how to get them out. +`out` will either be a string (for text formats) or a blob for binary formats of the main output. `mediaFiles` will be a map of all additional dirs/files that pandoc has created during the process. ## Acknowledgements diff --git a/src/index.js b/src/index.js index 8d5238f..fdf6cd0 100644 --- a/src/index.js +++ b/src/index.js @@ -20,7 +20,7 @@ const outFile = new File(new Uint8Array(), { async function toUint8Array(inData) { - let uint8Array + let uint8Array; if (typeof inData === 'string') { // If inData is a text string, convert it to a Uint8Array @@ -37,7 +37,38 @@ async function toUint8Array(inData) { return uint8Array } +const textDecoder = new TextDecoder("utf-8", { + fatal: true +}) +function convertData(data) { + let outData + try { + // Attempt to decode the data as UTF-8 text + // Return as string if successful + outData = textDecoder.decode(data) + } catch (e) { + // If decoding fails, assume it's binary data and return as Blob + outData = new Blob([data]) + } + return outData +} + +function convertItem(name, value) { + if (value.contents) { + // directory + return [ + name, + new Map([...value.contents].map(([name, value]) => convertItem(name, value))) + ] + } else if (value.data) { + // file + return [ + name, + convertData(value.data) + ] + } +} export async function pandoc(args_str, inData, resources = []) { @@ -53,13 +84,15 @@ export async function pandoc(args_str, inData, resources = []) { })]) } + const rootDir = new PreopenDirectory("/", files) + const fds = [ new OpenFile(new File(new Uint8Array(), { readonly: true })), ConsoleStdout.lineBuffered((msg) => console.log(`[WASI stdout] ${msg}`)), ConsoleStdout.lineBuffered((msg) => console.warn(`[WASI stderr] ${msg}`)), - new PreopenDirectory("/", files), + rootDir, ] const options = { debug: false @@ -108,18 +141,14 @@ export async function pandoc(args_str, inData, resources = []) { inFile.data = await toUint8Array(inData) instance.exports.wasm_main(args_ptr, args_str.length) - let outData - try { - // Attempt to decode the data as UTF-8 text - const textDecoder = new TextDecoder("utf-8", { - fatal: true - }) - // Return as string if successful - outData = textDecoder.decode(outFile.data) - } catch (e) { - // If decoding fails, assume it's binary data and return as Blob - outData = new Blob([outFile.data]) - } - return outData + // Find any generated media files + + const knownFileNames = ["in", "out"].concat(resources.map(resource => resource.filename)) + const mediaFiles = new Map([...rootDir.dir.contents].filter(([name, _value]) => !knownFileNames.includes(name)).map(([name, value]) => convertItem(name, value))) + + return { + out: convertData(outFile.data), + mediaFiles + } } From ecf8fa9fbbb261c9bd1397a3fa1a3ba06c4e1f9e Mon Sep 17 00:00:00 2001 From: Johannes Wilm Date: Thu, 13 Feb 2025 19:22:16 +0100 Subject: [PATCH 28/67] 0.6.0 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 09810d5..070b04a 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "pandoc-wasm", - "version": "0.5.1", + "version": "0.6.0", "description": "Pandoc transpiled as WASM to be used in browsers.", "main": "index.js", "scripts": { From 58f19c3c2bdb7fc6566cf3aff568c5fa38d69b10 Mon Sep 17 00:00:00 2001 From: Johannes Wilm Date: Thu, 13 Feb 2025 22:14:16 +0100 Subject: [PATCH 29/67] correct dependency --- README.md | 4 +++- package.json | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 0eba22c..7a14f9c 100644 --- a/README.md +++ b/README.md @@ -69,7 +69,9 @@ console.log(output) ``` -`out` will either be a string (for text formats) or a blob for binary formats of the main output. `mediaFiles` will be a map of all additional dirs/files that pandoc has created during the process. +`out` will either be a string (for text formats) or a Blob for binary formats of the main output. `mediaFiles` will be a map of all additional dirs/files that pandoc has created during the process. + + ## Acknowledgements diff --git a/package.json b/package.json index 070b04a..fb5b133 100644 --- a/package.json +++ b/package.json @@ -17,6 +17,6 @@ "author": "Johannes Wilm", "license": "MIT", "dependencies": { - "browser_wasi_shim": "^0.4.0" + "@bjorn3/browser_wasi_shim": "^0.4.0" } } From 45fa1448e0f30e04a48da92ab19ae7c09c50381f Mon Sep 17 00:00:00 2001 From: Johannes Wilm Date: Thu, 13 Feb 2025 22:14:38 +0100 Subject: [PATCH 30/67] 0.6.1 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index fb5b133..13e19a8 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "pandoc-wasm", - "version": "0.6.0", + "version": "0.6.1", "description": "Pandoc transpiled as WASM to be used in browsers.", "main": "index.js", "scripts": { From b743e09bf146be8a0ae933b1b0d80ffbe0491951 Mon Sep 17 00:00:00 2001 From: Johannes Wilm Date: Thu, 27 Feb 2025 09:18:22 +0100 Subject: [PATCH 31/67] update URLs --- .github/workflows/build.yml | 22 +++++++++++----------- README.md | 19 +++++++++---------- demo/index.html | 2 +- package.json | 4 ++-- 4 files changed, 23 insertions(+), 24 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8cc3539..64667b9 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -33,7 +33,7 @@ jobs: ~/.ghc-wasm/add_to_github_path.sh popd - - name: Checkout Pandoc-wasm + - name: Checkout wasm-pandoc uses: actions/checkout@v4 - name: Extract versions from version.txt @@ -107,14 +107,14 @@ jobs: - name: Upload as build artifact uses: actions/upload-artifact@v4 with: - name: pandoc-wasm-${{ env.VERSION }} + name: wasm-pandoc-${{ env.VERSION }} path: dist - name: Zip dist folder if: ${{ github.event_name == 'release' }} run: | pushd dist - zip -r ../pandoc-wasm-${{ env.VERSION }}.zip . + zip -r ../wasm-pandoc-${{ env.VERSION }}.zip . popd - name: Upload to release @@ -122,8 +122,8 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: pandoc-wasm-${{ env.VERSION }}.zip - asset_name: pandoc-wasm-${{ env.VERSION }}.zip + file: wasm-pandoc-${{ env.VERSION }}.zip + asset_name: wasm-pandoc-${{ env.VERSION }}.zip tag: ${{ github.ref }} overwrite: true @@ -147,9 +147,9 @@ jobs: FOLDER: dist GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # - name: Distribute via NPM - # if: ${{ github.event_name == 'release' }} - # uses: JS-DevTools/npm-publish@v3 - # with: - # package: "dist" - # token: ${{ secrets.NPM_TOKEN }} + - name: Distribute via NPM + if: ${{ github.event_name == 'release' }} + uses: JS-DevTools/npm-publish@v3 + with: + package: "dist" + token: ${{ secrets.NPM_TOKEN }} diff --git a/README.md b/README.md index 7a14f9c..1eb54bf 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,12 @@ -# `pandoc-wasm` +# `wasm-pandoc` -**In search of maintainer:** I have temporarily taken over maintainership of this package due to there being no package on NPM. However, I know very little about wasm and haskell and would like for someone else to take this package again. (@Terrorjack ?) +**Looking for maintainer:** Johannes Wilm has temporarily taken over maintainership of this package due to there being no package on NPM. +However, he knows very little about wasm and haskell and would like for someone else to take this package again. -The latest version of `pandoc` CLI compiled as a standalone -`wasm32-wasi` module that can be run by engines like `wasmtime` as -well as browsers. +The latest version of `pandoc` CLI compiled as a standalone `wasm32-wasi` module that can be run by browsers. -## [Live demo](https://johanneswilm.github.io/pandoc-wasm) +## [Live demo](https://fiduswriter.github.io/wasm-pandoc) Stdin on the left, stdout on the right, command line arguments at the bottom. No convert button, output is produced dynamically as input @@ -16,7 +15,7 @@ changes. ## To use -1. Make `pandoc-wasm` a dependency in your project.json. +1. Make `wasm-pandoc` a dependency in your project.json. 2. In your bundler mark "wasm" as an asset/resource. For example in rspack, in your config file: @@ -39,10 +38,10 @@ module.exports = { } ``` -3. Import `pandoc` from `pandoc-wasm` like this: +3. Import `pandoc` from `wasm-pandoc` like this: ```js -import { pandoc } from "pandoc-wasm" +import { pandoc } from "wasm-pandoc" ``` 4. Execute it like this (it's async): @@ -85,4 +84,4 @@ Thanks to all efforts to make `pandoc` run with wasm, including but not limited - Cheng Shao [`pandoc-wasm`](https://github.com/tweag/pandoc-wasm) - George Stagg's [`pandoc-wasm`](https://github.com/georgestagg/pandoc-wasm) - Yuto Takahashi's [`wasm-pandoc`](https://github.com/y-taka-23/wasm-pandoc) -- My legacy asterius pandoc [demo](https://asterius.netlify.app/demo/pandoc/pandoc.html) +- TerrorJack's asterius pandoc [demo](https://asterius.netlify.app/demo/pandoc/pandoc.html) diff --git a/demo/index.html b/demo/index.html index 7e710ec..0944228 100644 --- a/demo/index.html +++ b/demo/index.html @@ -3,7 +3,7 @@ - pandoc-wasm playground + wasm-pandoc playground