Trinkle23897
diff --git a/‎.github/workflows/test.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/test.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 6 additions & 6 deletions b/‎CMakeLists.txt‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎MANIFEST.in‎
Lines changed: 1 addition & 1 deletion b/‎MANIFEST.in‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Makefile‎
Lines changed: 4 additions & 4 deletions b/‎Makefile‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎README.md‎
Lines changed: 15 additions & 12 deletions b/‎README.md‎
Lines changed: 15 additions & 12 deletions
diff --git a/‎docs/backend/README.md‎
Lines changed: 22 additions & 22 deletions b/‎docs/backend/README.md‎
Lines changed: 22 additions & 22 deletions
@@ -29,5 +29,5 @@ jobs:
       run: |
         cd tests
         python3 data.py benchmark
-        pie --check-backend
-        pie -s circle6.png -t circle6.png -m circle6.png -o result.png -n 5000
+        fpie --check-backend
+        fpie -s circle6.png -t circle6.png -m circle6.png -o result.png -n 5000
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 3.4)
 
-project(pie_core)
+project(fpie_core)
 
 if(NOT CMAKE_BUILD_TYPE)
   set(CMAKE_BUILD_TYPE Release)
@@ -23,21 +23,21 @@ endif()
 
 add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/pybind11)
 
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/pybind11/include pie/core)
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/pybind11/include fpie/core)
 
-add_subdirectory(pie/core/gcc)
+add_subdirectory(fpie/core/gcc)
 
 find_package(OpenMP)
 if(OpenMP_FOUND)
-  add_subdirectory(pie/core/openmp)
+  add_subdirectory(fpie/core/openmp)
 endif()
 
 find_package(MPI)
 if(MPI_FOUND)
-  add_subdirectory(pie/core/mpi)
+  add_subdirectory(fpie/core/mpi)
 endif()
 
 find_package(CUDA)
 if(CUDA_FOUND)
-  add_subdirectory(pie/core/cuda)
+  add_subdirectory(fpie/core/cuda)
 endif()
@@ -1,3 +1,3 @@
 include CMakeLists.txt
 include cmake_modules/*.cmake
-recursive-include pie CMakeLists.txt *.h *.cc *.cu
+recursive-include fpie CMakeLists.txt *.h *.cc *.cu
@@ -1,8 +1,8 @@
 SHELL        = /bin/bash
-PROJECT_NAME = pie
-PYTHON_FILES = $(shell find setup.py pie tests -type f -name "*.py")
-CPP_FILES    = $(shell find pie -type f -name "*.h" -o -name "*.cc" -o -name "*.cu")
-CMAKE_FILES  = $(shell find pie -type f -name "CMakeLists.txt") $(shell find cmake_modules -type f) CMakeLists.txt
+PROJECT_NAME = fpie
+PYTHON_FILES = $(shell find setup.py fpie tests -type f -name "*.py")
+CPP_FILES    = $(shell find fpie -type f -name "*.h" -o -name "*.cc" -o -name "*.cu")
+CMAKE_FILES  = $(shell find fpie -type f -name "CMakeLists.txt") $(shell find cmake_modules -type f) CMakeLists.txt
 COMMIT_HASH  = $(shell git log -1 --format=%h)
 
 # installation
 
@@ -1,7 +1,7 @@
 # Poisson Image Editing - A Parallel Implementation
 
-[![PyPI](https://img.shields.io/pypi/v/pie)](https://pypi.org/project/pie/)
-[![Unittest](https://github.com/Trinkle23897/Fast-Poisson-Image-Editing/workflows/Unittest/badge.svg?branch=master)](https://github.com/Trinkle23897/Fast-Poisson-Image-Editing/actions)
+[![PyPI](https://img.shields.io/pypi/v/fpie)](https://pypi.org/project/fpie/)
+[![Unittest](https://github.com/Trinkle23897/Fast-Poisson-Image-Editing/workflows/Test/badge.svg?branch=main)](https://github.com/Trinkle23897/Fast-Poisson-Image-Editing/actions)
 
 > Jiayi Weng (jiayiwen), Zixu Chen (zixuc)
 
@@ -17,6 +17,9 @@ This project aims to provide a fast poisson image editing algorithm (based on [J
 # install cmake >= 3.4
 # if you don't have sudo (like GHC), install cmake from source
 # on macOS, type `brew install cmake`
+$ pip install fpie
+
+# or install from source
 $ pip install .
 ```
 
@@ -34,7 +37,7 @@ $ pip install .
 After installation, you can use `--check-backend` option to verify:
 
 ```bash
-$ pie --check-backend
+$ fpie --check-backend
 ['numpy', 'taichi-cpu', 'taichi-gpu', 'taichi-cuda', 'gcc', 'openmp', 'mpi', 'cuda']
 ```
 
@@ -51,14 +54,14 @@ $ cd tests && ./data.py
 This script will download 8 tests from GitHub, and create 10 images for benchmarking (5 circle, 5 square). To run:
 
 ```bash
-$ pie -s test1_src.jpg -m test1_mask.jpg -t test1_tgt.jpg -o result1.jpg -h1 -150 -w1 -50 -n 5000 -g max
-$ pie -s test2_src.png -m test2_mask.png -t test2_tgt.png -o result2.jpg -h1 130 -w1 130 -n 5000 -g src
-$ pie -s test3_src.jpg -m test3_mask.jpg -t test3_tgt.jpg -o result3.jpg -h1 100 -w1 100 -n 5000 -g max
-$ pie -s test4_src.jpg -m test4_mask.jpg -t test4_tgt.jpg -o result4.jpg -h1 100 -w1 100 -n 5000 -g max
-$ pie -s test5_src.jpg -m test5_mask.png -t test5_tgt.jpg -o result5.jpg -h0 -70 -w0 0 -h1 50 -w1 0 -n 5000 -g max
-$ pie -s test6_src.png -m test6_mask.png -t test6_tgt.png -o result6.jpg -h1 50 -w1 0 -n 5000 -g max
-$ pie -s test7_src.jpg -t test7_tgt.jpg -o result7.jpg -h1 50 -w1 30 -n 5000 -g max
-$ pie -s test8_src.jpg -t test8_tgt.jpg -o result8.jpg -h1 90 -w1 90 -n 10000 -g max
+$ fpie -s test1_src.jpg -m test1_mask.jpg -t test1_tgt.jpg -o result1.jpg -h1 -150 -w1 -50 -n 5000 -g max
+$ fpie -s test2_src.png -m test2_mask.png -t test2_tgt.png -o result2.jpg -h1 130 -w1 130 -n 5000 -g src
+$ fpie -s test3_src.jpg -m test3_mask.jpg -t test3_tgt.jpg -o result3.jpg -h1 100 -w1 100 -n 5000 -g max
+$ fpie -s test4_src.jpg -m test4_mask.jpg -t test4_tgt.jpg -o result4.jpg -h1 100 -w1 100 -n 5000 -g max
+$ fpie -s test5_src.jpg -m test5_mask.png -t test5_tgt.jpg -o result5.jpg -h0 -70 -w0 0 -h1 50 -w1 0 -n 5000 -g max
+$ fpie -s test6_src.png -m test6_mask.png -t test6_tgt.png -o result6.jpg -h1 50 -w1 0 -n 5000 -g max
+$ fpie -s test7_src.jpg -t test7_tgt.jpg -o result7.jpg -h1 50 -w1 30 -n 5000 -g max
+$ fpie -s test8_src.jpg -t test8_tgt.jpg -o result8.jpg -h1 90 -w1 90 -n 10000 -g max
 ```
 
 Here are the results:
@@ -80,7 +83,7 @@ We have provided 6 backends. Each backend has two solvers: EquSolver and GridSol
 
 For different backend usage, please check out the related documentation under [docs/backend](/docs/backend).
 
-For other usage, please run `pie -h` to see the hint.
+For other usage, please run `fpie -h` to see the hint.
 
 ## Benchmark Result
 
 
@@ -2,7 +2,7 @@
 
 To specify backend, simply typing `-b cuda` or `--backend openmp`, together with other parameters described below.
 
-Feel free to play `pie` with other arguments!
+Feel free to play `fpie` with other arguments!
 
 ## GridSolver
 
@@ -32,7 +32,7 @@ This backend uses NumPy vectorized operation for parallel computation.
 There's no extra parameter for NumPy EquSolver:
 
 ```bash
-$ pie -s test2_src.png -m test2_mask.png -t test2_tgt.png -o result.jpg -h1 130 -w1 130 -n 5000 -g src -b numpy --method equ
+$ fpie -s test2_src.png -m test2_mask.png -t test2_tgt.png -o result.jpg -h1 130 -w1 130 -n 5000 -g src -b numpy --method equ
 Successfully initialize PIE equ solver with numpy backend
 # of vars: 12559
 Iter 5000, abs error [450.09415 445.24747 636.1397 ]
@@ -43,7 +43,7 @@ Successfully write image to result.jpg
 There's no extra parameter for NumPy GridSolver:
 
 ```bash
-$ pie -s test2_src.png -m test2_mask.png -t test2_tgt.png -o result.jpg -h1 130 -w1 130 -n 5000 -g src -b numpy --method grid
+$ fpie -s test2_src.png -m test2_mask.png -t test2_tgt.png -o result.jpg -h1 130 -w1 130 -n 5000 -g src -b numpy --method grid
 Successfully initialize PIE grid solver with numpy backend
 # of vars: 17227
 Iter 5000, abs error [450.07922 445.27014 636.1374 ]
@@ -60,7 +60,7 @@ This backend uses a single thread C++ program to perform computation.
 There's no extra parameter for GCC EquSolver:
 
 ```bash
-$ pie -s test2_src.png -m test2_mask.png -t test2_tgt.png -o result.jpg -h1 130 -w1 130 -n 5000 -g src -b gcc --method equ
+$ fpie -s test2_src.png -m test2_mask.png -t test2_tgt.png -o result.jpg -h1 130 -w1 130 -n 5000 -g src -b gcc --method equ
 Successfully initialize PIE equ solver with gcc backend
 # of vars: 12559
 Iter 5000, abs error [ 5.179281   6.6939087 11.006622 ]
@@ -71,7 +71,7 @@ Successfully write image to result.jpg
 For GCC GridSolver, you need to specify `--grid-x` and `--grid-y` described in the first section:
 
 ```bash
-$ pie -s test2_src.png -m test2_mask.png -t test2_tgt.png -o result.jpg -h1 130 -w1 130 -n 5000 -g src -b gcc --method grid --grid-x 8 --grid-y 8
+$ fpie -s test2_src.png -m test2_mask.png -t test2_tgt.png -o result.jpg -h1 130 -w1 130 -n 5000 -g src -b gcc --method grid --grid-x 8 --grid-y 8
 Successfully initialize PIE grid solver with gcc backend
 # of vars: 17227
 Iter 5000, abs error [ 5.1776047  6.69458   11.001862 ]
@@ -92,7 +92,7 @@ There's no other parameters for Taichi EquSolver:
 
 ```bash
 # taichi-cpu
-$ pie -s test2_src.png -m test2_mask.png -t test2_tgt.png -o result.jpg -h1 130 -w1 130 -n 5000 -g src -b taichi-cpu --method equ -c 6 
+$ fpie -s test2_src.png -m test2_mask.png -t test2_tgt.png -o result.jpg -h1 130 -w1 130 -n 5000 -g src -b taichi-cpu --method equ -c 6 
 [Taichi] version 0.9.2, llvm 10.0.0, commit 7a4d73cd, linux, python 3.8.10
 [Taichi] Starting on arch=x64
 Successfully initialize PIE equ solver with taichi-cpu backend
@@ -104,7 +104,7 @@ Successfully write image to result.jpg
 
 ```bash
 # taichi-gpu
-$ pie -s test2_src.png -m test2_mask.png -t test2_tgt.png -o result.jpg -h1 130 -w1 130 -n 5000 -g src -b taichi-gpu --method equ -z 1024
+$ fpie -s test2_src.png -m test2_mask.png -t test2_tgt.png -o result.jpg -h1 130 -w1 130 -n 5000 -g src -b taichi-gpu --method equ -z 1024
 [Taichi] version 0.9.2, llvm 10.0.0, commit 7a4d73cd, linux, python 3.8.10
 [Taichi] Starting on arch=cuda
 Successfully initialize PIE equ solver with taichi-gpu backend
@@ -118,7 +118,7 @@ For Taichi GridSolver, you also need to specify `--grid-x` and `--grid-y` descri
 
 ```bash
 # taichi-cpu
-$ pie -s test2_src.png -m test2_mask.png -t test2_tgt.png -o result.jpg -h1 130 -w1 130 -n 5000 -g src -b taichi-cpu --method grid --grid-x 16 --grid-y 16 -c 12
+$ fpie -s test2_src.png -m test2_mask.png -t test2_tgt.png -o result.jpg -h1 130 -w1 130 -n 5000 -g src -b taichi-cpu --method grid --grid-x 16 --grid-y 16 -c 12
 [Taichi] version 0.9.2, llvm 10.0.0, commit 7a4d73cd, linux, python 3.8.10
 [Taichi] Starting on arch=x64
 Successfully initialize PIE grid solver with taichi-cpu backend
@@ -130,7 +130,7 @@ Successfully write image to result.jpg
 
 ```bash
 # taichi-gpu
-$ pie -s test2_src.png -m test2_mask.png -t test2_tgt.png -o result.jpg -h1 130 -w1 130 -n 5000 -g src -b taichi-gpu --method grid --grid-x 8 --grid-y 8 -z 64 
+$ fpie -s test2_src.png -m test2_mask.png -t test2_tgt.png -o result.jpg -h1 130 -w1 130 -n 5000 -g src -b taichi-gpu --method grid --grid-x 8 --grid-y 8 -z 64 
 [Taichi] version 0.9.2, llvm 10.0.0, commit 7a4d73cd, linux, python 3.8.10
 [Taichi] Starting on arch=cuda
 Successfully initialize PIE grid solver with taichi-gpu backend
@@ -147,7 +147,7 @@ OpenMP backend needs to specify the number of CPU cores it can use, with `-c` or
 There's no other parameters for OpenMP EquSolver:
 
 ```bash
-$ pie -s test2_src.png -m test2_mask.png -t test2_tgt.png -o result.jpg -h1 130 -w1 130 -n 5000 -g src -b openmp --method equ -c 6
+$ fpie -s test2_src.png -m test2_mask.png -t test2_tgt.png -o result.jpg -h1 130 -w1 130 -n 5000 -g src -b openmp --method equ -c 6
 Successfully initialize PIE equ solver with openmp backend
 # of vars: 12559
 Iter 5000, abs error [ 5.2758713  6.768402  11.11969  ]
@@ -158,7 +158,7 @@ Successfully write image to result.jpg
 For OpenMP GridSolver, you also need to specify `--grid-x` and `--grid-y` described in the first section:
 
 ```bash
-$ pie -s test2_src.png -m test2_mask.png -t test2_tgt.png -o result.jpg -h1 130 -w1 130 -n 5000 -g src -b openmp --method grid --grid-x 8 --grid-y 8 -c 6
+$ fpie -s test2_src.png -m test2_mask.png -t test2_tgt.png -o result.jpg -h1 130 -w1 130 -n 5000 -g src -b openmp --method grid --grid-x 8 --grid-y 8 -c 6
 Successfully initialize PIE grid solver with openmp backend
 # of vars: 17227
 Iter 5000, abs error [ 5.187172  6.701462 11.020264]
@@ -168,22 +168,22 @@ Successfully write image to result.jpg
 
 ### Parallelization Strategy
 
-For [EquSolver](https://github.com/Trinkle23897/Fast-Poisson-Image-Editing/blob/main/pie/core/openmp/equ.cc), it first groups the pixels into two folds by `(i+j)%2`, then parallelizes per-pixel iteration inside a group in each step. This strategy can utilize the thread-local accessment.
+For [EquSolver](https://github.com/Trinkle23897/Fast-Poisson-Image-Editing/blob/main/fpie/core/openmp/equ.cc), it first groups the pixels into two folds by `(i+j)%2`, then parallelizes per-pixel iteration inside a group in each step. This strategy can utilize the thread-local accessment.
 
-For [GridSolver](https://github.com/Trinkle23897/Fast-Poisson-Image-Editing/blob/main/pie/core/openmp/grid.cc), it parallelizes per-grid iteration in each step, where the grid size is `(grid_x, grid_y)`. It simply iterates all pixels in each grid.
+For [GridSolver](https://github.com/Trinkle23897/Fast-Poisson-Image-Editing/blob/main/fpie/core/openmp/grid.cc), it parallelizes per-grid iteration in each step, where the grid size is `(grid_x, grid_y)`. It simply iterates all pixels in each grid.
 
 ## MPI
 
 To run with MPI backend, you need to install both mpicc and mpi4py (`pip install mpi4py`).
 
-Different from other methods, you need to use `mpiexec` or `mpirun` to launch MPI service instead of directly calling `pie` program. `-np` option is to indicate the number of process it will launch.
+Different from other methods, you need to use `mpiexec` or `mpirun` to launch MPI service instead of directly calling `fpie` program. `-np` option is to indicate the number of process it will launch.
 
 Apart from that, you need to specify the synchronization interval for MPI backend with `--mpi-sync-interval`. If this number is too small, it will cause a large amount of overhead of synchronization; however, if it is too large, the quality of solution drops down dramatically.
 
 MPI EquSolver and GridSolver don't have any other arguments because of the parallelization strategy we used, see the next section.
 
 ```bash
-$ mpiexec -np 6 pie -s test2_src.png -m test2_mask.png -t test2_tgt.png -o result.jpg -h1 130 -w1 130 -n 5000 -g src -b mpi --method equ --mpi-sync-interval 100 
+$ mpiexec -np 6 fpie -s test2_src.png -m test2_mask.png -t test2_tgt.png -o result.jpg -h1 130 -w1 130 -n 5000 -g src -b mpi --method equ --mpi-sync-interval 100 
 Successfully initialize PIE equ solver with mpi backend
 # of vars: 12559
 Iter 5000, abs error [264.6767  269.55304 368.4869 ]
@@ -192,7 +192,7 @@ Successfully write image to result.jpg
 ```
 
 ```bash
-$ mpiexec -np 6 pie -s test2_src.png -m test2_mask.png -t test2_tgt.png -o result.jpg -h1 130 -w1 130 -n 5000 -g src -b mpi --method grid --mpi-sync-interval 100
+$ mpiexec -np 6 fpie -s test2_src.png -m test2_mask.png -t test2_tgt.png -o result.jpg -h1 130 -w1 130 -n 5000 -g src -b mpi --method grid --mpi-sync-interval 100
 Successfully initialize PIE grid solver with mpi backend
 # of vars: 17227
 Iter 5000, abs error [204.41124 215.00548 296.4441 ]
@@ -204,9 +204,9 @@ Successfully write image to result.jpg
 
 MPI cannot use share-memory program model, so that we need to reduce the amount of data for communication. Each process is only responsible for a part of computation, and synchronized with other process per `mpi_sync_interval` steps.
 
-For [EquSolver](https://github.com/Trinkle23897/Fast-Poisson-Image-Editing/blob/main/pie/core/mpi/equ.cc), it's hard to say which part of the data should be exchanged to other process, since it relabels all pixels at the very beginning of this process. We use `MPI_Bcast` to force sync all data.
+For [EquSolver](https://github.com/Trinkle23897/Fast-Poisson-Image-Editing/blob/main/fpie/core/mpi/equ.cc), it's hard to say which part of the data should be exchanged to other process, since it relabels all pixels at the very beginning of this process. We use `MPI_Bcast` to force sync all data.
 
-For [GridSolver](https://github.com/Trinkle23897/Fast-Poisson-Image-Editing/blob/main/pie/core/mpi/grid.cc), we use line partition: process `i` exchanges its first and last line data with process `i-1` and `i+1` separately. This strategy has a continuous memory layout to exchange, thus has less overhead comparing with block partition.
+For [GridSolver](https://github.com/Trinkle23897/Fast-Poisson-Image-Editing/blob/main/fpie/core/mpi/grid.cc), we use line partition: process `i` exchanges its first and last line data with process `i-1` and `i+1` separately. This strategy has a continuous memory layout to exchange, thus has less overhead comparing with block partition.
 
 However, even if we don't use the synchronization in MPI (set `mpi_sync_interval` to be greater than the number of iteration), it is still slower than OpenMP and CUDA backends.
 
@@ -217,7 +217,7 @@ CUDA backend needs to specify the number of threads in one block it will use, wi
 There's no other parameters for CUDA EquSolver:
 
 ```bash
-$ pie -s test2_src.png -m test2_mask.png -t test2_tgt.png -o result.jpg -h1 130 -w1 130 -n 5000 -g src -b cuda --method equ -z 256
+$ fpie -s test2_src.png -m test2_mask.png -t test2_tgt.png -o result.jpg -h1 130 -w1 130 -n 5000 -g src -b cuda --method equ -z 256
 ---------------------------------------------------------
 Found 1 CUDA devices
 Device 0: NVIDIA GeForce GTX 1060
@@ -235,7 +235,7 @@ Successfully write image to result.jpg
 For CUDA GridSolver, you also need to specify `--grid-x` and `--grid-y` described in the first section:
 
 ```bash
-$ pie -s test2_src.png -m test2_mask.png -t test2_tgt.png -o result.jpg -h1 130 -w1 130 -n 5000 -g src -b cuda --method grid --grid-x 4 --grid-y 128 -z 1024
+$ fpie -s test2_src.png -m test2_mask.png -t test2_tgt.png -o result.jpg -h1 130 -w1 130 -n 5000 -g src -b cuda --method grid --grid-x 4 --grid-y 128 -z 1024
 ---------------------------------------------------------
 Found 1 CUDA devices
 Device 0: NVIDIA GeForce GTX 1060
@@ -254,6 +254,6 @@ Successfully write image to result.jpg
 
 The strategy used in CUDA backend is quite similar to OpenMP.
 
-For [EquSolver](https://github.com/Trinkle23897/Fast-Poisson-Image-Editing/blob/main/pie/core/cuda/equ.cu), it performs equation-level parallelization.
+For [EquSolver](https://github.com/Trinkle23897/Fast-Poisson-Image-Editing/blob/main/fpie/core/cuda/equ.cu), it performs equation-level parallelization.
 
-For [GridSolver](https://github.com/Trinkle23897/Fast-Poisson-Image-Editing/blob/main/pie/core/cuda/grid.cu), each grid with size `(grid_x, grid_y)` will be in the same block. A thread in a block performs iteration only for a single pixel.
+For [GridSolver](https://github.com/Trinkle23897/Fast-Poisson-Image-Editing/blob/main/fpie/core/cuda/grid.cu), each grid with size `(grid_x, grid_y)` will be in the same block. A thread in a block performs iteration only for a single pixel.