diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 41acae098..5ef132ba8 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -154,7 +154,7 @@ The following log levels are used to track code behaviour: # Developing and contributing -## Development set up +## Manual development set up ### 1. Install [**sbt**(>=1.4.7)](https://www.scala-sbt.org/download.html). @@ -214,6 +214,60 @@ sbt publishM2
+## Automatic development set up (Nix) +### 1. Install Nix +For installing Nix, you can follow the instructions in the [Nix website](https://nixos.org/download/). It consist on running only one command in your terminal. If you are using Linux with SELinux enabled the installation method for `single-user` would be simpler. +```bash +### 2. Activate the Nix shell + +For installing all the dependencies and setting up the environment, you can use the Nix shell. It will have spark, sbt and JAVA 8 installed. + +```bash +$ nix-shell #by default it will use bash +$ nix-shell --command zsh # in case you want to use zsh +``` + +### 3. Project packaging: +Navigate to the repository folder and package the project using **sbt**. + +``` bash +cd qbeast-spark + +sbt assembly +``` +This code generates a fat jar with all required dependencies (or most of them) shaded. + +The jar does not include scala nor spark nor delta, and it is supposed to be used inside spark. + +For example: +```bash +sbt assembly + +$SPARK_HOME/bin/spark-shell \ +--jars ./target/scala-2.12/qbeast-spark-assembly-0.8.0-SNAPSHOT.jar \ +--packages io.delta:delta-spark_2.12:3.1.0 \ +--conf spark.sql.extensions=io.qbeast.sql.QbeastSparkSessionExtension \ +--conf spark.sql.catalog.spark_catalog=io.qbeast.catalog.QbeastCatalog +``` + +### 4. Publishing artefacts in the local repository +Sometimes it is convenient to have custom versions of the library to be +published in the local repository like IVy or Maven. For local Ivy (`~/.ivy2`) +use + +```bash +sbt publishLocal +``` + +For local Maven (~/.m2) use + +```bash +sbt publishM2 +``` + +
+ + ## Developer documentation You can find the developer documentation (Scala docs) in the [https://docs.qbeast.io/](https://docs.qbeast.io/). @@ -379,4 +433,4 @@ The scale of the Qbeast Spark project can only be achieved through highly reliab As Pablo Picasso once said: -> **"Action is the foundational key to all success."** \ No newline at end of file +> **"Action is the foundational key to all success."** diff --git a/shell.nix b/shell.nix new file mode 100644 index 000000000..40d80b3a2 --- /dev/null +++ b/shell.nix @@ -0,0 +1,67 @@ +{ pkgs ? import {} }: + +let + # Version, URL and hash of the Spark binary + sparkVersion = "3.5.0"; + sparkUrl = "https://archive.apache.org/dist/spark/spark-${sparkVersion}/spark-${sparkVersion}-bin-hadoop3.tgz"; + # The hash must match the official one at https://archive.apache.org/dist/spark/spark-3.5.0/spark-3.5.0-bin-hadoop3.tgz.sha512 + sparkHash = "8883c67e0a138069e597f3e7d4edbbd5c3a565d50b28644aad02856a1ec1da7cb92b8f80454ca427118f69459ea326eaa073cf7b1a860c3b796f4b07c2101319"; + + # Derivation for preparing the Spark binary + # Warning: It does not include the JAVA runtime, it must be installed separately + spark = pkgs.stdenv.mkDerivation { + pname = "spark"; + version = sparkVersion; + + # Fetch the tarball + src = pkgs.fetchurl { + url = sparkUrl; + sha512 = sparkHash; + }; + # Install the tarball on the system, it will be located /nix/store/... + installPhase = '' + mkdir -p $out + tar -xzf $src --strip-components=1 -C $out + ''; + # Define the metadata of the derivation, not relevant for the build + meta = { + description = "Apache Spark ${sparkVersion} with prebuilt Hadoop3 binaries"; + licenses= pkgs.licenses.apache2; + homepage = "https://spark.apache.org"; + }; + }; + + # Override the sbt package to set the Java home to zulu8 + # By default the package uses OpenJDK 21 + # Check https://github.com/NixOS/nixpkgs/blob/nixos-24.11/pkgs/development/compilers/zulu/common.nix + sbt = pkgs.sbt.overrideAttrs (oldAttrs: { + postPatch = '' + echo -java-home ${pkgs.zulu8.jre} >> conf/sbtopts + ''; + }); +in + +# Define the development shell that includes Spark, sbt and Zulu8 JDK +pkgs.mkShell { + packages = [ + # Install the Zulu8 JDK required for Spark and sbt + # Packages from nixpkgs (https://search.nixos.org/packages) + # JAVA_HOME will be set to /nix/store/.../zulu8 automatically + pkgs.zulu8 + # sbt with a custom overlay to set the Java home + sbt + # Spark binary fetched from the official Apache archive + spark + ]; + + # Configure the environment variables + SPARK_HOME = "${spark.out}"; + + + # Script to be executed when the shell is started + shellHook = '' + echo "Your development environment for qbeast is ready, happy coding!" + echo "Try 'spark-shell' or 'sbt test' to start." + ''; +} +