Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
73 commits
Select commit Hold shift + click to select a range
05aeacf
Update Horned-OWL to version 0.14
ckindermann Jul 17, 2023
8428721
Update Horned-OWL to version 1.0
ckindermann Jul 4, 2024
45e4751
Add libraries
ckindermann Aug 27, 2024
fdd3c81
Add CLI
ckindermann Aug 27, 2024
43554e5
Add modules
ckindermann Aug 27, 2024
610260a
Add init
ckindermann Aug 27, 2024
93f6336
Add export
ckindermann Aug 27, 2024
8c7d494
Add prefix
ckindermann Aug 27, 2024
6a79fb5
Update import
ckindermann Aug 27, 2024
4e13b90
Format
ckindermann Aug 27, 2024
0cd8121
Wrap IRIs in angle brackets
ckindermann Aug 27, 2024
514b7d3
Format
ckindermann Aug 27, 2024
f65b033
Wrap IRIs in angle brackets
ckindermann Aug 27, 2024
8b8221b
Omit owl:Thing and rdfs:Literal in cardinality restrictions
ckindermann Sep 4, 2024
5ea2d67
Fix translation of IRIs
ckindermann Sep 5, 2024
b5d7e8c
Fix translation of ontology version
ckindermann Sep 6, 2024
c0c86b2
Fix translation of ontology annotations
ckindermann Sep 7, 2024
bd69282
Add support for SWRL rules
ckindermann Oct 28, 2024
54724a0
Use ArcStr to enable multi-threading
ckindermann Dec 18, 2024
35db35c
First parallelization
ckindermann Dec 18, 2024
01baae7
Add rayon
ckindermann Dec 20, 2024
d4855c1
Remove nested JSON objects in subject column (using blank nodes)
ckindermann Jan 7, 2025
e6e41f9
Add escaping strategy for whitespace characters
ckindermann Jan 7, 2025
5eb3310
Fix translation of JSON objects
ckindermann Jan 13, 2025
6b06d68
Fix splitting of blank node structures
ckindermann Jan 22, 2025
a2c84ae
Add SHA library
ckindermann Jan 24, 2025
7bcd219
Remove annotations from hashes
ckindermann Jan 29, 2025
808b0a7
Change name for blank nodes
ckindermann May 6, 2025
5cffeb9
Curify LDTab instead of OFN
ckindermann May 13, 2025
8655277
Add typing triples for SWRL variables
ckindermann May 27, 2025
d7896ec
Fix CURIEs for properties
ckindermann Jun 10, 2025
762aead
Fix ontology ID axioms
ckindermann Jun 17, 2025
36afbbb
Fix HasValue translation
ckindermann Sep 30, 2025
470ba76
Fix HasKey translation
ckindermann Oct 14, 2025
8802a2a
Refactor curification
ckindermann Oct 27, 2025
d6036f8
Fix hashes for JSON object subjects
ckindermann Oct 28, 2025
d395535
Split blanknodes
ckindermann Oct 30, 2025
e8905cd
Fix annotations
ckindermann Nov 11, 2025
bb1e95c
Fix literal translation
ckindermann Nov 25, 2025
ddda6e4
Update artifact build
ckindermann Dec 2, 2025
286af60
Fix blank node IDs
ckindermann Jan 20, 2026
3186ea1
Update parser
ckindermann Jan 27, 2026
f07871c
Dedpulicate code
ckindermann Jan 27, 2026
8b4dbd4
Avoid repeated regex compilation
ckindermann Jan 27, 2026
a905f89
Use struct for LDTab triples
ckindermann Feb 1, 2026
e13ea6e
Refactor monolithic function
ckindermann Feb 2, 2026
9bf443a
Remove magic strings
ckindermann Feb 2, 2026
6a65ba0
Update actions
ckindermann Feb 2, 2026
4ba9ec3
Remove unused imports
ckindermann Feb 24, 2026
a056a5a
Fix typo
ckindermann Feb 24, 2026
0ebdf21
Add helper for wrapping IRIs
ckindermann Feb 24, 2026
8db734a
Add helper for OFN-S expressions
ckindermann Feb 24, 2026
ef13716
Remove unnecessary clones
ckindermann Feb 24, 2026
0109e90
Implement AnnotationProperty translation
ckindermann Feb 24, 2026
23fbbb8
Remove deprecated code
ckindermann Feb 24, 2026
a65db83
Use ofn_list in rule translation
ckindermann Feb 24, 2026
b0a4306
Use ofn_list in hasKey translation
ckindermann Feb 24, 2026
e39f895
Compile regex once
ckindermann Feb 27, 2026
b1f2d2f
Refactor IRI extraction
ckindermann Feb 28, 2026
fa2a151
Refactor building of default fillers
ckindermann Feb 28, 2026
dac3735
Avoid unnecessary clones
ckindermann Feb 28, 2026
1b12be2
Factor our cardinality parsing
ckindermann Feb 28, 2026
6a3479c
Clean up iteration
ckindermann Feb 28, 2026
e91fc55
Avoid checking regex twice
ckindermann Feb 28, 2026
ad2e79a
Remove deprecated functions
ckindermann Feb 28, 2026
a1c2153
Fix variable name
ckindermann Feb 28, 2026
fbd0ce3
Change into_iter to iter
ckindermann Feb 28, 2026
a7d3e63
Refactor monolithic import
ckindermann Feb 28, 2026
9429901
Use matches macro
ckindermann Feb 28, 2026
252612f
Use iter rather than into_iter
ckindermann Feb 28, 2026
b56eda4
Use ArcStr in ofn_2_owl
ckindermann Feb 28, 2026
c951f4e
use LDTabTriple
ckindermann Feb 28, 2026
1ef5e02
Return Result<OWL>
ckindermann Mar 2, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 50 additions & 43 deletions .github/workflows/create-artifacts.yml
Original file line number Diff line number Diff line change
@@ -1,64 +1,71 @@
name: Create artifacts

on:
[push]
push:

permissions:
contents: read

env:
CARGO_TERM_COLOR: always

jobs:
build-binaries:
build:
strategy:
fail-fast: false
matrix:
target: [
{ runner: "macos-11", arch: "x86_64-apple-darwin" },
{ runner: "macos-11", arch: "aarch64-apple-darwin" },
{ runner: "windows-2022", arch: "x86_64-pc-windows-msvc" },
{ runner: "ubuntu-20.04", arch: "x86_64-unknown-linux-gnu" },
{ runner: "ubuntu-20.04", arch: "x86_64-unknown-linux-musl" },
]
runs-on: ${{ matrix.target.runner }}
include:
- os: macos-15-intel
target: x86_64-apple-darwin
ext: ""
- os: macos-15
target: aarch64-apple-darwin
ext: ""
- os: windows-2022
target: x86_64-pc-windows-msvc
ext: ".exe"
- os: ubuntu-24.04
target: x86_64-unknown-linux-gnu
ext: ""
- os: ubuntu-24.04
target: x86_64-unknown-linux-musl
ext: ""
use_cross: true

runs-on: ${{ matrix.os }}

steps:
- name: Check out repository code
uses: actions/checkout@v3
- uses: actions/checkout@v6

- name: Install musl-tools (MUSL)
if: ${{ matrix.target.runner == 'ubuntu-20.04' && matrix.target.arch == 'x86_64-unknown-linux-musl' }}
run: |
sudo apt-get update
sudo apt-get install musl-tools
- uses: dtolnay/rust-toolchain@stable
with:
targets: ${{ matrix.target }}

- uses: Swatinem/rust-cache@v2

- name: Install latest rust toolchain
uses: actions-rs/toolchain@v1
- name: Install cross (MUSL only)
if: ${{ matrix.use_cross }}
uses: taiki-e/install-action@v2
with:
toolchain: stable
target: ${{ matrix.target.arch }}
default: true
override: true
tool: cross

- name: Build binary using cross (Linux)
if: ${{ matrix.target.runner == 'ubuntu-20.04' }}
- name: Build + package
shell: bash
run: |
cargo install cross --git https://github.com/cross-rs/cross
sudo systemctl start docker
cross build --release --target ${{ matrix.target.arch }}
cp target/${{ matrix.target.arch }}/release/ldtab ldtab-${{ matrix.target.arch }}
set -euxo pipefail

- name: Build binary using cargo (MacOS)
if: ${{ matrix.target.runner == 'macos-11' }}
run: |
cargo build --release --target ${{ matrix.target.arch }}
cp target/${{ matrix.target.arch }}/release/ldtab ldtab-${{ matrix.target.arch }}
if [[ "${{ matrix.use_cross }}" == "true" ]]; then
cross build --release --target "${{ matrix.target }}"
else
cargo build --release --target "${{ matrix.target }}"
fi

- name: Build binary using cargo (Windows)
if: ${{ matrix.target.runner == 'windows-2022' }}
run: |
cargo build --release --target ${{ matrix.target.arch }}
cp target\${{ matrix.target.arch }}\release\ldtab.exe ldtab-${{ matrix.target.arch }}.exe
mkdir -p dist
cp "target/${{ matrix.target }}/release/ldtab${{ matrix.ext }}" \
"dist/ldtab-${{ matrix.target }}${{ matrix.ext }}"

- name: Upload binary artifacts
uses: actions/upload-artifact@v3
- uses: actions/upload-artifact@v6
with:
name: ${{ matrix.target.arch }}-bin
path: ldtab-${{ matrix.target.arch }}
name: ${{ matrix.target }}-bin
path: dist/ldtab-${{ matrix.target }}${{ matrix.ext }}
if-no-files-found: error
11 changes: 10 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,17 @@ wiring_rs = { path = "../wiring.rs" }
serde = { version = "1.0", features = ["derive"] }
regex = "1.6.0"
serde_json = "1.0"
horned-owl = "0.12.0"
sha2 = "0.10"
horned-owl = "1.0.0"
horned-bin = "1.0.0"
im = "15.1.0"
itertools = "0.10.3"
rayon = "1.7"
rio_api = "0.7.1"
rio_xml = "0.7.3"
clap = { version = "4.5.11", features = ["cargo", "derive"] }
sqlx = { version = "0.7", features = ["sqlite", "runtime-tokio-native-tls"] }
tokio = { version = "1", features = ["full"] }
anyhow = "1.0.86"
csv = "1.1"
once_cell = "1.17.1"
75 changes: 75 additions & 0 deletions src/export/export.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
use anyhow::{Context, Result};
use clap::ArgMatches;
use sqlx::{sqlite::SqlitePoolOptions, Row};
use std::fs::OpenOptions;
use std::io::Write;

fn escape_whitespace(input: &str) -> String {
input
.chars()
.map(|c| match c {
'\n' => "\\n".to_string(),
'\t' => "\\t".to_string(),
//'\r' => "\\r".to_string(),
//'\x08' => "\\b".to_string(),
//'\x0C' => "\\f".to_string(),
//'\x0B' => "\\v".to_string(),
//' ' => "\\s".to_string(), // Optional: Escape space as '\\s'
_ => c.to_string(),
})
.collect()
}

pub async fn export(sub_matches: &ArgMatches) -> Result<()> {
let database = sub_matches.get_one::<String>("database");
let output = sub_matches.get_one::<String>("output");

//these unwraps are safe
let database_path = database.unwrap();
let output_path = output.unwrap();

let pool = SqlitePoolOptions::new()
.max_connections(5)
.connect(database_path)
.await
.context("Failed to connect to the database")?;

//load data
let rows = sqlx::query(
r#"
SELECT * FROM statement
"#,
)
.fetch_all(&pool)
.await
.context("Failed to fetch rows from the table")?;

let mut file = OpenOptions::new()
.append(true)
.create(true)
.open(output_path)?;

let header = "assertion\tretraction\tgraph\tsubject\tpredicate\tobject\tdatatype\tannotation";
file.write_all(header.as_bytes())?;
file.write_all(b"\n")?;

for row in rows {
let assertion: i32 = row.get("assertion");
let retraction: i32 = row.get("retraction");
let graph: String = row.get("graph");
let subject: String = row.get("subject");
let predicate: String = row.get("predicate");
let object: String = escape_whitespace(row.get("object"));
let datatype: String = row.get("datatype");
let annotation: String = row.get("annotation");

let line = format!(
"{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}",
assertion, retraction, graph, subject, predicate, object, datatype, annotation
);

file.write_all(line.as_bytes())?;
file.write_all(b"\n")?;
}
Ok(())
}
1 change: 1 addition & 0 deletions src/export/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pub mod export;
154 changes: 154 additions & 0 deletions src/import/curify.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
use once_cell::sync::Lazy;
use regex::Regex;
use serde_json::Value;
use sha2::{Digest, Sha256};
use std::collections::HashMap;

use super::triple::LdTabTriple;

pub(crate) static DATATYPE_LITERAL_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"^"(?s)(.*)"\^\^(.*)$"#).unwrap());

pub(crate) fn curify_triples(
triples: &[LdTabTriple],
prefix_map: &HashMap<String, String>,
) -> Vec<LdTabTriple> {
triples
.iter()
.map(|t| {
LdTabTriple {
assertion: t.assertion,
retraction: t.retraction,
graph: t.graph.clone(),
subject: curify_value(&t.subject, prefix_map),
predicate: curify_value(&t.predicate, prefix_map),
object: curify_value(&t.object, prefix_map),
datatype: curify_value(&t.datatype, prefix_map),
annotation: curify_value(&t.annotation, prefix_map),
}
})
.collect()
}

fn curify_value(ldtab: &Value, iri2prefix: &HashMap<String, String>) -> Value {
match ldtab {
Value::Array(vec) => {
let new_vec: Vec<Value> = vec
.iter()
.map(|item| curify_value(item, iri2prefix))
.collect();
Value::Array(new_vec)
}
Value::Object(map) => {
let mut new_map = serde_json::Map::new();
for (key, value) in map.iter() {
let curified_key = replace_substrings(key, iri2prefix);
new_map.insert(curified_key, curify_value(value, iri2prefix));
}
Value::Object(new_map)
}
Value::String(s) => Value::String(replace_substrings(s, iri2prefix)),
_ => ldtab.clone(),
}
}

pub(crate) fn generate_blank_node_id(value: &Value, prefix2iri: &HashMap<String, String>) -> String {
let expanded = uncurify_value(value, prefix2iri);
let sorted = wiring_rs::ofn_2_ldtab::util::sort_value(&expanded);
let json_string = sorted.to_string();

let mut hasher = Sha256::new();
hasher.update(json_string.as_bytes());
let hash = hasher.finalize();

format!("<ldtab:blanknode:{:x}>", hash)
}

pub(crate) fn is_full_iri(s: &str) -> bool {
s.starts_with('<') && s.ends_with('>')
}

pub(crate) fn invert_prefix_map(iri2prefix: &HashMap<String, String>) -> HashMap<String, String> {
iri2prefix
.iter()
.map(|(iri_base, prefix)| (prefix.clone(), iri_base.clone()))
.collect()
}

fn try_curify_iri(iri: &str, iri2prefix: &HashMap<String, String>) -> Option<String> {
let trimmed = &iri[1..iri.len() - 1]; // remove angle brackets
for (key, value) in iri2prefix {
if trimmed.starts_with(key) {
return Some(format!("{}:{}", value, &trimmed[key.len()..]));
}
}
None
}

fn replace_substrings(input: &str, iri2prefix: &HashMap<String, String>) -> String {
if is_full_iri(input) {
try_curify_iri(input, iri2prefix).unwrap_or_else(|| input.to_string())
} else if let Some(x) = DATATYPE_LITERAL_REGEX.captures(input) {
let literal = &x[1];
let datatype_iri = &x[2];

match try_curify_iri(datatype_iri, iri2prefix) {
Some(curified) => format!("\"{}\"^^{}", literal, curified),
None => input.to_string(),
}
} else {
input.to_string()
}
}

fn uncurify_value(ldtab: &Value, prefix2iri: &HashMap<String, String>) -> Value {
match ldtab {
Value::Array(vec) => {
let new_vec: Vec<Value> = vec
.iter()
.map(|item| uncurify_value(item, prefix2iri))
.collect();
Value::Array(new_vec)
}
Value::Object(map) => {
let mut new_map = serde_json::Map::new();
for (key, value) in map.iter() {
let expanded_key = expand_curies(key, prefix2iri);
new_map.insert(expanded_key, uncurify_value(value, prefix2iri));
}
Value::Object(new_map)
}
Value::String(s) => Value::String(expand_curies(s, prefix2iri)),
_ => ldtab.clone(),
}
}

fn expand_curies(input: &str, prefix2iri: &HashMap<String, String>) -> String {
if let Some(expanded) = expand_curie_to_iri(input, prefix2iri) {
return expanded;
}

if let Some(caps) = DATATYPE_LITERAL_REGEX.captures(input) {
let literal = &caps[1];
let dtype = &caps[2];

if let Some(expanded_dtype) = expand_curie_to_iri(dtype, prefix2iri) {
return format!("\"{}\"^^{}", literal, expanded_dtype);
}
}

input.to_string()
}

fn expand_curie_to_iri(curie: &str, prefix2iri: &HashMap<String, String>) -> Option<String> {
if is_full_iri(curie) {
return None;
}

let mut parts = curie.splitn(2, ':');
let prefix = parts.next()?;
let local = parts.next()?;

let iri_base = prefix2iri.get(prefix)?;
Some(format!("<{}{}>", iri_base, local))
}
Loading
Loading