diff --git a/java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/IntegrationTest.java b/java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/IntegrationTest.java index e55fc24f..e0ddb6d8 100644 --- a/java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/IntegrationTest.java +++ b/java/opendataloader-pdf-core/src/test/java/org/opendataloader/pdf/IntegrationTest.java @@ -28,26 +28,22 @@ public class IntegrationTest { static Stream integrationTestParams() { return Stream.of( - Arguments.of("1901.03003.pdf") - ); + Arguments.of("lorem.pdf")); } @ParameterizedTest(name = "{index}: ({0}) => {0}") @MethodSource("integrationTestParams") public void test(String fileName) throws IOException { - Path pdfPath = Paths.get("../../resources", fileName); - Path jsonPath = Paths.get("../../resources", fileName.replace(".pdf", ".json")); + Path pdfPath = Paths.get("../../samples/pdf", fileName); + Path jsonPath = Paths.get("../../samples/json", fileName.replace(".pdf", ".json")); File pdfFile = pdfPath.toFile(); File jsonFile = jsonPath.toFile(); Config config = new Config(); - config.setGenerateMarkdown(true); - config.setGenerateHtml(true); - config.setGeneratePDF(true); - config.setOutputFolder("../../resources/temp"); + config.setOutputFolder("../../samples/temp"); DocumentProcessor.processFile(pdfFile.getAbsolutePath(), config); - Path resultPath = Paths.get("../../resources/temp", fileName.replace(".pdf", ".json")); + Path resultPath = Paths.get("../../samples/temp", fileName.replace(".pdf", ".json")); File resultJson = resultPath.toFile(); ObjectMapper mapper = new ObjectMapper(); diff --git a/samples/json/lorem.json b/samples/json/lorem.json new file mode 100644 index 00000000..6f830274 --- /dev/null +++ b/samples/json/lorem.json @@ -0,0 +1,29 @@ +{ + "file name" : "lorem.pdf", + "number of pages" : 1, + "author" : "leebd-public", + "title" : null, + "creation date" : "D:20251010112501+09'00'", + "modification date" : "D:20251010112501+09'00'", + "kids" : [ { + "type" : "heading", + "id" : 1, + "level" : "Doctitle", + "page number" : 1, + "bounding box" : [ 200.891, 706.938, 394.152, 745.132 ], + "heading level" : 1, + "font" : "Pretendard-Regular", + "font size" : 32.005, + "text color" : "[0.0]", + "content" : "Lorem Ipsum" + }, { + "type" : "paragraph", + "id" : 2, + "page number" : 1, + "bounding box" : [ 85.034, 567.936, 502.306, 659.761 ], + "font" : "Pretendard-Regular", + "font size" : 9.949, + "text color" : "[0.0]", + "content" : "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum." + } ] +} \ No newline at end of file diff --git a/samples/pdf/lorem.pdf b/samples/pdf/lorem.pdf new file mode 100644 index 00000000..eb189eca Binary files /dev/null and b/samples/pdf/lorem.pdf differ