diff --git a/README.md b/README.md index 97d7083f..aba69946 100644 --- a/README.md +++ b/README.md @@ -205,7 +205,7 @@ opendataloader-pdf path/to/document.pdf path/to/folder -o path/to/output -f json ## Java -For various example templates, including Gradle and Maven, please refer to https://github.com/opendataloader-project/opendataloader-pdf/tree/main/examples/java. +For various example templates, including Gradle and Maven, please refer to [Examples](https://github.com/opendataloader-project/opendataloader-pdf-examples). ### Dependency @@ -221,7 +221,7 @@ Check for the latest version on [Maven Central](https://search.maven.org/artifac org.opendataloader opendataloader-pdf-core - 1.0.0 + 1.1.2 diff --git a/examples/java/README.md b/examples/java/README.md deleted file mode 100644 index 4b1b19d1..00000000 --- a/examples/java/README.md +++ /dev/null @@ -1,100 +0,0 @@ -# OpenDataLoader PDF - Java Sample Applications - -This directory contains sample applications demonstrating how to use the `opendataloader-pdf-core` library. These examples are designed for beginners to get started easily. - -## System Requirements - -To run these examples, you will need the following software installed on your system: - -1. **Java Development Kit (JDK) - Version 11 or higher:** - * The JDK is required to compile and run Java applications. - * We recommend installing a free version from [Eclipse Temurin (Adoptium)](https://adoptium.net/). - -2. **Apache Maven (for the Maven example only):** - * Maven is a build tool used for the `maven-example`. - * You can download it and find installation instructions on the [Maven website](https://maven.apache.org/download.cgi). - -**Note for Gradle Examples:** You do **not** need to install Gradle separately. The `gradle-groovy-example` and `gradle-kotlin-example` projects include a "Gradle Wrapper" (`gradlew` or `gradlew.bat`). When you run a command with the wrapper, it will automatically download and use the correct Gradle version for the project. - ---- - -## 1. Maven Example - -This sample shows how to use the library in a standard Java project built with Maven. The example is pre-configured to process a sample PDF file included in this repository. - -### Instructions - -1. **Navigate to the directory:** - Open a terminal or command prompt and navigate to the `maven-example` directory. - ```shell - cd maven-example - ``` - -2. **Build and Run the Application:** - Run the following Maven command. This command will download dependencies, compile the code, and execute the application in one step. - ```shell - mvn clean install exec:java - ``` - -3. **Verify the Output:** - After the command finishes, you will see success messages in your console. The output files (JSON, Markdown, and an annotated PDF) will be generated in the `maven-example/target` directory. - - You can inspect the generated files to see the results of the PDF processing. - ---- - -## 2. Gradle (Groovy DSL) Example - -This sample uses Gradle with the traditional Groovy DSL for its build script (`build.gradle`). The example is configured to use the `opendataloader-pdf-core` library to process a sample PDF file from the `samples/pdf` directory and save the results. - -### Instructions - -1. **Navigate to the directory:** - Open a terminal or command prompt and navigate to the `gradle-groovy-example` directory: - ```shell - cd gradle-groovy-example - ``` - -2. **Build and Run the Application:** - Run the application using the Gradle Wrapper. This single command will download dependencies, compile the code, and execute the main method. - - **On Linux/macOS:** - ```shell - ./gradlew run - ``` - **On Windows:** - ```shell - gradlew.bat run - ``` - -3. **Verify the Output:** - After the command finishes, you will see success messages in your console. The output files (JSON, Markdown, and an annotated PDF) will be generated in the `gradle-groovy-example/build` directory. - ---- - -## 3. Gradle (Kotlin DSL) Example - -This sample uses Gradle with the modern Kotlin DSL for its build script (`build.gradle.kts`). The example is configured to use the `opendataloader-pdf-core` library to process a sample PDF file from the `samples/pdf` directory and save the results. - -### Instructions - -1. **Navigate to the directory:** - Open a terminal or command prompt and navigate to the `gradle-kotlin-example` directory: - ```shell - cd gradle-kotlin-example - ``` - -2. **Build and Run the Application:** - Run the application using the Gradle Wrapper. This single command will download dependencies, compile the code, and execute the main method. - - **On Linux/macOS:** - ```shell - ./gradlew run - ``` - **On Windows:** - ```shell - gradlew.bat run - ``` - -3. **Verify the Output:** - After the command finishes, you will see success messages in your console. The output files (JSON, Markdown, and an annotated PDF) will be generated in the `gradle-kotlin-example/build` directory. diff --git a/examples/java/gradle-groovy-example/.gitignore b/examples/java/gradle-groovy-example/.gitignore deleted file mode 100644 index b1dff0dd..00000000 --- a/examples/java/gradle-groovy-example/.gitignore +++ /dev/null @@ -1,45 +0,0 @@ -.gradle -build/ -!gradle/wrapper/gradle-wrapper.jar -!**/src/main/**/build/ -!**/src/test/**/build/ - -### IntelliJ IDEA ### -.idea/modules.xml -.idea/jarRepositories.xml -.idea/compiler.xml -.idea/libraries/ -*.iws -*.iml -*.ipr -out/ -!**/src/main/**/out/ -!**/src/test/**/out/ - -### Kotlin ### -.kotlin - -### Eclipse ### -.apt_generated -.classpath -.factorypath -.project -.settings -.springBeans -.sts4-cache -bin/ -!**/src/main/**/bin/ -!**/src/test/**/bin/ - -### NetBeans ### -/nbproject/private/ -/nbbuild/ -/dist/ -/nbdist/ -/.nb-gradle/ - -### VS Code ### -.vscode/ - -### Mac OS ### -.DS_Store \ No newline at end of file diff --git a/examples/java/gradle-groovy-example/build.gradle b/examples/java/gradle-groovy-example/build.gradle deleted file mode 100644 index bfce6749..00000000 --- a/examples/java/gradle-groovy-example/build.gradle +++ /dev/null @@ -1,32 +0,0 @@ -plugins { - id 'java' - id 'application' -} - -application { - mainClass = 'org.example.gradle.Main' -} - -group = 'org.example.gradle' -version = '1.0-SNAPSHOT' - -repositories { - mavenCentral() - maven { - url 'https://artifactory.openpreservation.org/artifactory/vera-dev' - } -} - -dependencies { - implementation 'org.opendataloader:opendataloader-pdf-core:1.0.0' - testImplementation platform('org.junit:junit-bom:5.10.0') - testImplementation 'org.junit.jupiter:junit-jupiter' -} - -test { - useJUnitPlatform() -} - -tasks.withType(JavaCompile) { - options.encoding = 'UTF-8' -} diff --git a/examples/java/gradle-groovy-example/gradle/wrapper/gradle-wrapper.jar b/examples/java/gradle-groovy-example/gradle/wrapper/gradle-wrapper.jar deleted file mode 100644 index 9bbc975c..00000000 Binary files a/examples/java/gradle-groovy-example/gradle/wrapper/gradle-wrapper.jar and /dev/null differ diff --git a/examples/java/gradle-groovy-example/gradle/wrapper/gradle-wrapper.properties b/examples/java/gradle-groovy-example/gradle/wrapper/gradle-wrapper.properties deleted file mode 100644 index 37f853b1..00000000 --- a/examples/java/gradle-groovy-example/gradle/wrapper/gradle-wrapper.properties +++ /dev/null @@ -1,7 +0,0 @@ -distributionBase=GRADLE_USER_HOME -distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-8.13-bin.zip -networkTimeout=10000 -validateDistributionUrl=true -zipStoreBase=GRADLE_USER_HOME -zipStorePath=wrapper/dists diff --git a/examples/java/gradle-groovy-example/gradlew b/examples/java/gradle-groovy-example/gradlew deleted file mode 100755 index faf93008..00000000 --- a/examples/java/gradle-groovy-example/gradlew +++ /dev/null @@ -1,251 +0,0 @@ -#!/bin/sh - -# -# Copyright © 2015-2021 the original authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 -# - -############################################################################## -# -# Gradle start up script for POSIX generated by Gradle. -# -# Important for running: -# -# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is -# noncompliant, but you have some other compliant shell such as ksh or -# bash, then to run this script, type that shell name before the whole -# command line, like: -# -# ksh Gradle -# -# Busybox and similar reduced shells will NOT work, because this script -# requires all of these POSIX shell features: -# * functions; -# * expansions «$var», «${var}», «${var:-default}», «${var+SET}», -# «${var#prefix}», «${var%suffix}», and «$( cmd )»; -# * compound commands having a testable exit status, especially «case»; -# * various built-in commands including «command», «set», and «ulimit». -# -# Important for patching: -# -# (2) This script targets any POSIX shell, so it avoids extensions provided -# by Bash, Ksh, etc; in particular arrays are avoided. -# -# The "traditional" practice of packing multiple parameters into a -# space-separated string is a well documented source of bugs and security -# problems, so this is (mostly) avoided, by progressively accumulating -# options in "$@", and eventually passing that to Java. -# -# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, -# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; -# see the in-line comments for details. -# -# There are tweaks for specific operating systems such as AIX, CygWin, -# Darwin, MinGW, and NonStop. -# -# (3) This script is generated from the Groovy template -# https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt -# within the Gradle project. -# -# You can find Gradle at https://github.com/gradle/gradle/. -# -############################################################################## - -# Attempt to set APP_HOME - -# Resolve links: $0 may be a link -app_path=$0 - -# Need this for daisy-chained symlinks. -while - APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path - [ -h "$app_path" ] -do - ls=$( ls -ld "$app_path" ) - link=${ls#*' -> '} - case $link in #( - /*) app_path=$link ;; #( - *) app_path=$APP_HOME$link ;; - esac -done - -# This is normally unused -# shellcheck disable=SC2034 -APP_BASE_NAME=${0##*/} -# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036) -APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s\n' "$PWD" ) || exit - -# Use the maximum available, or set MAX_FD != -1 to use that value. -MAX_FD=maximum - -warn () { - echo "$*" -} >&2 - -die () { - echo - echo "$*" - echo - exit 1 -} >&2 - -# OS specific support (must be 'true' or 'false'). -cygwin=false -msys=false -darwin=false -nonstop=false -case "$( uname )" in #( - CYGWIN* ) cygwin=true ;; #( - Darwin* ) darwin=true ;; #( - MSYS* | MINGW* ) msys=true ;; #( - NONSTOP* ) nonstop=true ;; -esac - -CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar - - -# Determine the Java command to use to start the JVM. -if [ -n "$JAVA_HOME" ] ; then - if [ -x "$JAVA_HOME/jre/sh/java" ] ; then - # IBM's JDK on AIX uses strange locations for the executables - JAVACMD=$JAVA_HOME/jre/sh/java - else - JAVACMD=$JAVA_HOME/bin/java - fi - if [ ! -x "$JAVACMD" ] ; then - die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME - -Please set the JAVA_HOME variable in your environment to match the -location of your Java installation." - fi -else - JAVACMD=java - if ! command -v java >/dev/null 2>&1 - then - die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. - -Please set the JAVA_HOME variable in your environment to match the -location of your Java installation." - fi -fi - -# Increase the maximum file descriptors if we can. -if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then - case $MAX_FD in #( - max*) - # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. - # shellcheck disable=SC2039,SC3045 - MAX_FD=$( ulimit -H -n ) || - warn "Could not query maximum file descriptor limit" - esac - case $MAX_FD in #( - '' | soft) :;; #( - *) - # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. - # shellcheck disable=SC2039,SC3045 - ulimit -n "$MAX_FD" || - warn "Could not set maximum file descriptor limit to $MAX_FD" - esac -fi - -# Collect all arguments for the java command, stacking in reverse order: -# * args from the command line -# * the main class name -# * -classpath -# * -D...appname settings -# * --module-path (only if needed) -# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. - -# For Cygwin or MSYS, switch paths to Windows format before running java -if "$cygwin" || "$msys" ; then - APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) - CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) - - JAVACMD=$( cygpath --unix "$JAVACMD" ) - - # Now convert the arguments - kludge to limit ourselves to /bin/sh - for arg do - if - case $arg in #( - -*) false ;; # don't mess with options #( - /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath - [ -e "$t" ] ;; #( - *) false ;; - esac - then - arg=$( cygpath --path --ignore --mixed "$arg" ) - fi - # Roll the args list around exactly as many times as the number of - # args, so each arg winds up back in the position where it started, but - # possibly modified. - # - # NB: a `for` loop captures its iteration list before it begins, so - # changing the positional parameters here affects neither the number of - # iterations, nor the values presented in `arg`. - shift # remove old arg - set -- "$@" "$arg" # push replacement arg - done -fi - - -# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. -DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' - -# Collect all arguments for the java command: -# * DEFAULT_JVM_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments, -# and any embedded shellness will be escaped. -# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be -# treated as '${Hostname}' itself on the command line. - -set -- \ - "-Dorg.gradle.appname=$APP_BASE_NAME" \ - -classpath "$CLASSPATH" \ - org.gradle.wrapper.GradleWrapperMain \ - "$@" - -# Stop when "xargs" is not available. -if ! command -v xargs >/dev/null 2>&1 -then - die "xargs is not available" -fi - -# Use "xargs" to parse quoted args. -# -# With -n1 it outputs one arg per line, with the quotes and backslashes removed. -# -# In Bash we could simply go: -# -# readarray ARGS < <( xargs -n1 <<<"$var" ) && -# set -- "${ARGS[@]}" "$@" -# -# but POSIX shell has neither arrays nor command substitution, so instead we -# post-process each arg (as a line of input to sed) to backslash-escape any -# character that might be a shell metacharacter, then use eval to reverse -# that process (while maintaining the separation between arguments), and wrap -# the whole thing up as a single "set" statement. -# -# This will of course break if any of these variables contains a newline or -# an unmatched quote. -# - -eval "set -- $( - printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | - xargs -n1 | - sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | - tr '\n' ' ' - )" '"$@"' - -exec "$JAVACMD" "$@" diff --git a/examples/java/gradle-groovy-example/gradlew.bat b/examples/java/gradle-groovy-example/gradlew.bat deleted file mode 100644 index 9d21a218..00000000 --- a/examples/java/gradle-groovy-example/gradlew.bat +++ /dev/null @@ -1,94 +0,0 @@ -@rem -@rem Copyright 2015 the original author or authors. -@rem -@rem Licensed under the Apache License, Version 2.0 (the "License"); -@rem you may not use this file except in compliance with the License. -@rem You may obtain a copy of the License at -@rem -@rem https://www.apache.org/licenses/LICENSE-2.0 -@rem -@rem Unless required by applicable law or agreed to in writing, software -@rem distributed under the License is distributed on an "AS IS" BASIS, -@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -@rem See the License for the specific language governing permissions and -@rem limitations under the License. -@rem -@rem SPDX-License-Identifier: Apache-2.0 -@rem - -@if "%DEBUG%"=="" @echo off -@rem ########################################################################## -@rem -@rem Gradle startup script for Windows -@rem -@rem ########################################################################## - -@rem Set local scope for the variables with windows NT shell -if "%OS%"=="Windows_NT" setlocal - -set DIRNAME=%~dp0 -if "%DIRNAME%"=="" set DIRNAME=. -@rem This is normally unused -set APP_BASE_NAME=%~n0 -set APP_HOME=%DIRNAME% - -@rem Resolve any "." and ".." in APP_HOME to make it shorter. -for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi - -@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. -set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" - -@rem Find java.exe -if defined JAVA_HOME goto findJavaFromJavaHome - -set JAVA_EXE=java.exe -%JAVA_EXE% -version >NUL 2>&1 -if %ERRORLEVEL% equ 0 goto execute - -echo. 1>&2 -echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2 -echo. 1>&2 -echo Please set the JAVA_HOME variable in your environment to match the 1>&2 -echo location of your Java installation. 1>&2 - -goto fail - -:findJavaFromJavaHome -set JAVA_HOME=%JAVA_HOME:"=% -set JAVA_EXE=%JAVA_HOME%/bin/java.exe - -if exist "%JAVA_EXE%" goto execute - -echo. 1>&2 -echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2 -echo. 1>&2 -echo Please set the JAVA_HOME variable in your environment to match the 1>&2 -echo location of your Java installation. 1>&2 - -goto fail - -:execute -@rem Setup the command line - -set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar - - -@rem Execute Gradle -"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* - -:end -@rem End local scope for the variables with windows NT shell -if %ERRORLEVEL% equ 0 goto mainEnd - -:fail -rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of -rem the _cmd.exe /c_ return code! -set EXIT_CODE=%ERRORLEVEL% -if %EXIT_CODE% equ 0 set EXIT_CODE=1 -if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% -exit /b %EXIT_CODE% - -:mainEnd -if "%OS%"=="Windows_NT" endlocal - -:omega diff --git a/examples/java/gradle-groovy-example/settings.gradle b/examples/java/gradle-groovy-example/settings.gradle deleted file mode 100644 index b2889f4c..00000000 --- a/examples/java/gradle-groovy-example/settings.gradle +++ /dev/null @@ -1 +0,0 @@ -rootProject.name = 'gradle-groovy-example' \ No newline at end of file diff --git a/examples/java/gradle-groovy-example/src/main/java/org/example/gradle/Main.java b/examples/java/gradle-groovy-example/src/main/java/org/example/gradle/Main.java deleted file mode 100644 index 1dfdb992..00000000 --- a/examples/java/gradle-groovy-example/src/main/java/org/example/gradle/Main.java +++ /dev/null @@ -1,55 +0,0 @@ -package org.example.gradle; - -import org.opendataloader.pdf.api.Config; -import org.opendataloader.pdf.api.OpenDataLoaderPDF; - -import java.io.File; -import java.io.IOException; -import java.nio.file.Paths; - -public class Main { - public static void main(String[] args) { - // This example shows how to process a PDF file using the opendataloader-pdf-core library. - // It uses a sample PDF file located in the project root. - // The PDF file path can be changed as needed. - - // Set the relative path to the input PDF file based on the project's root directory. - // Since this class is run from the 'examples/java/gradle-groovy-example' directory, we need to move up to the parent directory. - String pdfFilePath = Paths.get("..", "..", "..", "samples", "pdf", "2408.02509v1.pdf").toString(); - File pdfFile = new File(pdfFilePath); - - if (!pdfFile.exists()) { - System.err.println("PDF file does not exist: " + pdfFile.getAbsolutePath()); - System.exit(1); - } - - // Set the output directory to the 'build' folder for Gradle. - File outputDir = new File("build"); - if (!outputDir.exists()) { - outputDir.mkdirs(); - } - - try { - System.out.println("Processing PDF file: " + pdfFile.getAbsolutePath()); - - // Configure the library settings. - Config config = new Config(); - // Set the output folder to the 'build' directory. - config.setOutputFolder(outputDir.getAbsolutePath()); - // Set to generate JSON, Markdown, and annotated PDF results. - config.setGenerateJSON(true); - config.setGenerateMarkdown(true); - config.setGeneratePDF(true); - - // Process the PDF file using OpenDataLoaderPDF. - OpenDataLoaderPDF.processFile(pdfFile.getAbsolutePath(), config); - - System.out.println("Processing finished successfully."); - System.out.println("Check the output files in: " + outputDir.getAbsolutePath()); - - } catch (IOException e) { - e.printStackTrace(); - System.exit(1); - } - } -} diff --git a/examples/java/gradle-kotlin-example/.gitignore b/examples/java/gradle-kotlin-example/.gitignore deleted file mode 100644 index b1dff0dd..00000000 --- a/examples/java/gradle-kotlin-example/.gitignore +++ /dev/null @@ -1,45 +0,0 @@ -.gradle -build/ -!gradle/wrapper/gradle-wrapper.jar -!**/src/main/**/build/ -!**/src/test/**/build/ - -### IntelliJ IDEA ### -.idea/modules.xml -.idea/jarRepositories.xml -.idea/compiler.xml -.idea/libraries/ -*.iws -*.iml -*.ipr -out/ -!**/src/main/**/out/ -!**/src/test/**/out/ - -### Kotlin ### -.kotlin - -### Eclipse ### -.apt_generated -.classpath -.factorypath -.project -.settings -.springBeans -.sts4-cache -bin/ -!**/src/main/**/bin/ -!**/src/test/**/bin/ - -### NetBeans ### -/nbproject/private/ -/nbbuild/ -/dist/ -/nbdist/ -/.nb-gradle/ - -### VS Code ### -.vscode/ - -### Mac OS ### -.DS_Store \ No newline at end of file diff --git a/examples/java/gradle-kotlin-example/build.gradle.kts b/examples/java/gradle-kotlin-example/build.gradle.kts deleted file mode 100644 index 5de53af5..00000000 --- a/examples/java/gradle-kotlin-example/build.gradle.kts +++ /dev/null @@ -1,32 +0,0 @@ -import org.jetbrains.kotlin.gradle.tasks.KotlinCompile - -plugins { - kotlin("jvm") version "1.9.23" - application -} - -application { - mainClass.set("org.example.gradlekt.MainKt") -} - -group = "org.example.gradlekt" -version = "1.0-SNAPSHOT" - -repositories { - mavenCentral() - maven("https://artifactory.openpreservation.org/artifactory/vera-dev") -} - -dependencies { - implementation("org.opendataloader:opendataloader-pdf-core:1.0.0") - testImplementation(platform("org.junit:junit-bom:5.10.0")) - testImplementation("org.junit.jupiter:junit-jupiter") -} - -tasks.test { - useJUnitPlatform() -} - -tasks.withType { - options.encoding = "UTF-8" -} diff --git a/examples/java/gradle-kotlin-example/gradle/wrapper/gradle-wrapper.jar b/examples/java/gradle-kotlin-example/gradle/wrapper/gradle-wrapper.jar deleted file mode 100644 index 9bbc975c..00000000 Binary files a/examples/java/gradle-kotlin-example/gradle/wrapper/gradle-wrapper.jar and /dev/null differ diff --git a/examples/java/gradle-kotlin-example/gradle/wrapper/gradle-wrapper.properties b/examples/java/gradle-kotlin-example/gradle/wrapper/gradle-wrapper.properties deleted file mode 100644 index 37f853b1..00000000 --- a/examples/java/gradle-kotlin-example/gradle/wrapper/gradle-wrapper.properties +++ /dev/null @@ -1,7 +0,0 @@ -distributionBase=GRADLE_USER_HOME -distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-8.13-bin.zip -networkTimeout=10000 -validateDistributionUrl=true -zipStoreBase=GRADLE_USER_HOME -zipStorePath=wrapper/dists diff --git a/examples/java/gradle-kotlin-example/gradlew b/examples/java/gradle-kotlin-example/gradlew deleted file mode 100755 index faf93008..00000000 --- a/examples/java/gradle-kotlin-example/gradlew +++ /dev/null @@ -1,251 +0,0 @@ -#!/bin/sh - -# -# Copyright © 2015-2021 the original authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 -# - -############################################################################## -# -# Gradle start up script for POSIX generated by Gradle. -# -# Important for running: -# -# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is -# noncompliant, but you have some other compliant shell such as ksh or -# bash, then to run this script, type that shell name before the whole -# command line, like: -# -# ksh Gradle -# -# Busybox and similar reduced shells will NOT work, because this script -# requires all of these POSIX shell features: -# * functions; -# * expansions «$var», «${var}», «${var:-default}», «${var+SET}», -# «${var#prefix}», «${var%suffix}», and «$( cmd )»; -# * compound commands having a testable exit status, especially «case»; -# * various built-in commands including «command», «set», and «ulimit». -# -# Important for patching: -# -# (2) This script targets any POSIX shell, so it avoids extensions provided -# by Bash, Ksh, etc; in particular arrays are avoided. -# -# The "traditional" practice of packing multiple parameters into a -# space-separated string is a well documented source of bugs and security -# problems, so this is (mostly) avoided, by progressively accumulating -# options in "$@", and eventually passing that to Java. -# -# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, -# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; -# see the in-line comments for details. -# -# There are tweaks for specific operating systems such as AIX, CygWin, -# Darwin, MinGW, and NonStop. -# -# (3) This script is generated from the Groovy template -# https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt -# within the Gradle project. -# -# You can find Gradle at https://github.com/gradle/gradle/. -# -############################################################################## - -# Attempt to set APP_HOME - -# Resolve links: $0 may be a link -app_path=$0 - -# Need this for daisy-chained symlinks. -while - APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path - [ -h "$app_path" ] -do - ls=$( ls -ld "$app_path" ) - link=${ls#*' -> '} - case $link in #( - /*) app_path=$link ;; #( - *) app_path=$APP_HOME$link ;; - esac -done - -# This is normally unused -# shellcheck disable=SC2034 -APP_BASE_NAME=${0##*/} -# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036) -APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s\n' "$PWD" ) || exit - -# Use the maximum available, or set MAX_FD != -1 to use that value. -MAX_FD=maximum - -warn () { - echo "$*" -} >&2 - -die () { - echo - echo "$*" - echo - exit 1 -} >&2 - -# OS specific support (must be 'true' or 'false'). -cygwin=false -msys=false -darwin=false -nonstop=false -case "$( uname )" in #( - CYGWIN* ) cygwin=true ;; #( - Darwin* ) darwin=true ;; #( - MSYS* | MINGW* ) msys=true ;; #( - NONSTOP* ) nonstop=true ;; -esac - -CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar - - -# Determine the Java command to use to start the JVM. -if [ -n "$JAVA_HOME" ] ; then - if [ -x "$JAVA_HOME/jre/sh/java" ] ; then - # IBM's JDK on AIX uses strange locations for the executables - JAVACMD=$JAVA_HOME/jre/sh/java - else - JAVACMD=$JAVA_HOME/bin/java - fi - if [ ! -x "$JAVACMD" ] ; then - die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME - -Please set the JAVA_HOME variable in your environment to match the -location of your Java installation." - fi -else - JAVACMD=java - if ! command -v java >/dev/null 2>&1 - then - die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. - -Please set the JAVA_HOME variable in your environment to match the -location of your Java installation." - fi -fi - -# Increase the maximum file descriptors if we can. -if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then - case $MAX_FD in #( - max*) - # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. - # shellcheck disable=SC2039,SC3045 - MAX_FD=$( ulimit -H -n ) || - warn "Could not query maximum file descriptor limit" - esac - case $MAX_FD in #( - '' | soft) :;; #( - *) - # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. - # shellcheck disable=SC2039,SC3045 - ulimit -n "$MAX_FD" || - warn "Could not set maximum file descriptor limit to $MAX_FD" - esac -fi - -# Collect all arguments for the java command, stacking in reverse order: -# * args from the command line -# * the main class name -# * -classpath -# * -D...appname settings -# * --module-path (only if needed) -# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. - -# For Cygwin or MSYS, switch paths to Windows format before running java -if "$cygwin" || "$msys" ; then - APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) - CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) - - JAVACMD=$( cygpath --unix "$JAVACMD" ) - - # Now convert the arguments - kludge to limit ourselves to /bin/sh - for arg do - if - case $arg in #( - -*) false ;; # don't mess with options #( - /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath - [ -e "$t" ] ;; #( - *) false ;; - esac - then - arg=$( cygpath --path --ignore --mixed "$arg" ) - fi - # Roll the args list around exactly as many times as the number of - # args, so each arg winds up back in the position where it started, but - # possibly modified. - # - # NB: a `for` loop captures its iteration list before it begins, so - # changing the positional parameters here affects neither the number of - # iterations, nor the values presented in `arg`. - shift # remove old arg - set -- "$@" "$arg" # push replacement arg - done -fi - - -# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. -DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' - -# Collect all arguments for the java command: -# * DEFAULT_JVM_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments, -# and any embedded shellness will be escaped. -# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be -# treated as '${Hostname}' itself on the command line. - -set -- \ - "-Dorg.gradle.appname=$APP_BASE_NAME" \ - -classpath "$CLASSPATH" \ - org.gradle.wrapper.GradleWrapperMain \ - "$@" - -# Stop when "xargs" is not available. -if ! command -v xargs >/dev/null 2>&1 -then - die "xargs is not available" -fi - -# Use "xargs" to parse quoted args. -# -# With -n1 it outputs one arg per line, with the quotes and backslashes removed. -# -# In Bash we could simply go: -# -# readarray ARGS < <( xargs -n1 <<<"$var" ) && -# set -- "${ARGS[@]}" "$@" -# -# but POSIX shell has neither arrays nor command substitution, so instead we -# post-process each arg (as a line of input to sed) to backslash-escape any -# character that might be a shell metacharacter, then use eval to reverse -# that process (while maintaining the separation between arguments), and wrap -# the whole thing up as a single "set" statement. -# -# This will of course break if any of these variables contains a newline or -# an unmatched quote. -# - -eval "set -- $( - printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | - xargs -n1 | - sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | - tr '\n' ' ' - )" '"$@"' - -exec "$JAVACMD" "$@" diff --git a/examples/java/gradle-kotlin-example/gradlew.bat b/examples/java/gradle-kotlin-example/gradlew.bat deleted file mode 100644 index 9d21a218..00000000 --- a/examples/java/gradle-kotlin-example/gradlew.bat +++ /dev/null @@ -1,94 +0,0 @@ -@rem -@rem Copyright 2015 the original author or authors. -@rem -@rem Licensed under the Apache License, Version 2.0 (the "License"); -@rem you may not use this file except in compliance with the License. -@rem You may obtain a copy of the License at -@rem -@rem https://www.apache.org/licenses/LICENSE-2.0 -@rem -@rem Unless required by applicable law or agreed to in writing, software -@rem distributed under the License is distributed on an "AS IS" BASIS, -@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -@rem See the License for the specific language governing permissions and -@rem limitations under the License. -@rem -@rem SPDX-License-Identifier: Apache-2.0 -@rem - -@if "%DEBUG%"=="" @echo off -@rem ########################################################################## -@rem -@rem Gradle startup script for Windows -@rem -@rem ########################################################################## - -@rem Set local scope for the variables with windows NT shell -if "%OS%"=="Windows_NT" setlocal - -set DIRNAME=%~dp0 -if "%DIRNAME%"=="" set DIRNAME=. -@rem This is normally unused -set APP_BASE_NAME=%~n0 -set APP_HOME=%DIRNAME% - -@rem Resolve any "." and ".." in APP_HOME to make it shorter. -for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi - -@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. -set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" - -@rem Find java.exe -if defined JAVA_HOME goto findJavaFromJavaHome - -set JAVA_EXE=java.exe -%JAVA_EXE% -version >NUL 2>&1 -if %ERRORLEVEL% equ 0 goto execute - -echo. 1>&2 -echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2 -echo. 1>&2 -echo Please set the JAVA_HOME variable in your environment to match the 1>&2 -echo location of your Java installation. 1>&2 - -goto fail - -:findJavaFromJavaHome -set JAVA_HOME=%JAVA_HOME:"=% -set JAVA_EXE=%JAVA_HOME%/bin/java.exe - -if exist "%JAVA_EXE%" goto execute - -echo. 1>&2 -echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2 -echo. 1>&2 -echo Please set the JAVA_HOME variable in your environment to match the 1>&2 -echo location of your Java installation. 1>&2 - -goto fail - -:execute -@rem Setup the command line - -set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar - - -@rem Execute Gradle -"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* - -:end -@rem End local scope for the variables with windows NT shell -if %ERRORLEVEL% equ 0 goto mainEnd - -:fail -rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of -rem the _cmd.exe /c_ return code! -set EXIT_CODE=%ERRORLEVEL% -if %EXIT_CODE% equ 0 set EXIT_CODE=1 -if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% -exit /b %EXIT_CODE% - -:mainEnd -if "%OS%"=="Windows_NT" endlocal - -:omega diff --git a/examples/java/gradle-kotlin-example/settings.gradle.kts b/examples/java/gradle-kotlin-example/settings.gradle.kts deleted file mode 100644 index d1edd6ee..00000000 --- a/examples/java/gradle-kotlin-example/settings.gradle.kts +++ /dev/null @@ -1 +0,0 @@ -rootProject.name = "gradle-kotlin-example" \ No newline at end of file diff --git a/examples/java/gradle-kotlin-example/src/main/kotlin/org/example/gradlekt/Main.kt b/examples/java/gradle-kotlin-example/src/main/kotlin/org/example/gradlekt/Main.kt deleted file mode 100644 index a284acab..00000000 --- a/examples/java/gradle-kotlin-example/src/main/kotlin/org/example/gradlekt/Main.kt +++ /dev/null @@ -1,52 +0,0 @@ -package org.example.gradlekt - -import org.opendataloader.pdf.api.Config -import org.opendataloader.pdf.api.OpenDataLoaderPDF -import java.io.File -import java.io.IOException -import java.nio.file.Paths -import kotlin.system.exitProcess - -fun main() { - // This example shows how to process a PDF file using the opendataloader-pdf-core library. - // It uses a sample PDF file located in the project root. - // The PDF file path can be changed as needed. - - // Set the relative path to the input PDF file based on the project's root directory. - val pdfFilePath = Paths.get("..", "..", "..", "samples", "pdf", "2408.02509v1.pdf").toString() - val pdfFile = File(pdfFilePath) - - if (!pdfFile.exists()) { - System.err.println("PDF file does not exist: " + pdfFile.absolutePath) - System.exit(1) - } - - // Set the output directory to the 'build' folder for Gradle. - val outputDir = File("build") - if (!outputDir.exists()) { - outputDir.mkdirs() - } - - try { - println("Processing PDF file: " + pdfFile.absolutePath) - - // Configure the library settings. - val config = Config() - // Set the output folder to the 'build' directory. - config.outputFolder = outputDir.absolutePath - // Set to generate JSON, Markdown, and annotated PDF results. - config.isGenerateJSON = true - config.isGenerateMarkdown = true - config.isGeneratePDF = true - - // Process the PDF file using OpenDataLoaderPDF. - OpenDataLoaderPDF.processFile(pdfFile.absolutePath, config) - - println("Processing finished successfully.") - println("Check the output files in: " + outputDir.absolutePath) - - } catch (e: IOException) { - e.printStackTrace() - exitProcess(1) - } -} diff --git a/examples/java/maven-example/.gitignore b/examples/java/maven-example/.gitignore deleted file mode 100644 index 5ff6309b..00000000 --- a/examples/java/maven-example/.gitignore +++ /dev/null @@ -1,38 +0,0 @@ -target/ -!.mvn/wrapper/maven-wrapper.jar -!**/src/main/**/target/ -!**/src/test/**/target/ - -### IntelliJ IDEA ### -.idea/modules.xml -.idea/jarRepositories.xml -.idea/compiler.xml -.idea/libraries/ -*.iws -*.iml -*.ipr - -### Eclipse ### -.apt_generated -.classpath -.factorypath -.project -.settings -.springBeans -.sts4-cache - -### NetBeans ### -/nbproject/private/ -/nbbuild/ -/dist/ -/nbdist/ -/.nb-gradle/ -build/ -!**/src/main/**/build/ -!**/src/test/**/build/ - -### VS Code ### -.vscode/ - -### Mac OS ### -.DS_Store \ No newline at end of file diff --git a/examples/java/maven-example/pom.xml b/examples/java/maven-example/pom.xml deleted file mode 100644 index 4d458acf..00000000 --- a/examples/java/maven-example/pom.xml +++ /dev/null @@ -1,59 +0,0 @@ - - - 4.0.0 - - org.example.maven - maven-example - 1.0-SNAPSHOT - - - 11 - 11 - UTF-8 - - - - - org.opendataloader - opendataloader-pdf-core - 1.0.0 - - - - - - - org.codehaus.mojo - exec-maven-plugin - 3.1.0 - - org.example.maven.Main - - - - - - - - - true - - vera-dev - Vera development - https://artifactory.openpreservation.org/artifactory/vera-dev - - - - - - false - - vera-dev - Vera development - https://artifactory.openpreservation.org/artifactory/vera-dev - - - - diff --git a/examples/java/maven-example/src/main/java/org/example/maven/Main.java b/examples/java/maven-example/src/main/java/org/example/maven/Main.java deleted file mode 100644 index 2e89c600..00000000 --- a/examples/java/maven-example/src/main/java/org/example/maven/Main.java +++ /dev/null @@ -1,55 +0,0 @@ -package org.example.maven; - -import org.opendataloader.pdf.api.Config; -import org.opendataloader.pdf.api.OpenDataLoaderPDF; - -import java.io.File; -import java.io.IOException; -import java.nio.file.Paths; - -public class Main { - public static void main(String[] args) { - // This example shows how to process a PDF file using the opendataloader-pdf-core library. - // It uses a sample PDF file located in the project root. - // The PDF file path can be changed as needed. - - // Set the relative path to the input PDF file based on the project's root directory. - // Since this class is run from the 'examples/java/maven-example' directory, we need to move up to the parent directory. - String pdfFilePath = Paths.get("..", "..", "..", "samples", "pdf", "2408.02509v1.pdf").toString(); - File pdfFile = new File(pdfFilePath); - - if (!pdfFile.exists()) { - System.err.println("PDF file does not exist: " + pdfFile.getAbsolutePath()); - System.exit(1); - } - - // Set the output directory to the 'target' folder. - File outputDir = new File("target"); - if (!outputDir.exists()) { - outputDir.mkdirs(); - } - - try { - System.out.println("Processing PDF file: " + pdfFile.getAbsolutePath()); - - // Configure the library settings. - Config config = new Config(); - // Set the output folder to the 'target' directory. - config.setOutputFolder(outputDir.getAbsolutePath()); - // Set to generate JSON, Markdown, and annotated PDF results. - config.setGenerateJSON(true); - config.setGenerateMarkdown(true); - config.setGeneratePDF(true); - - // Process the PDF file using OpenDataLoaderPDF. - OpenDataLoaderPDF.processFile(pdfFile.getAbsolutePath(), config); - - System.out.println("Processing finished successfully."); - System.out.println("Check the output files in: " + outputDir.getAbsolutePath()); - - } catch (IOException e) { - e.printStackTrace(); - System.exit(1); - } - } -}