Skip to content

Commit df1ba32

Browse files
authored
Merge pull request #33 from inbo/review
Upgrade to version 0.0.4. Ready for review in ropensci/software-review#263
2 parents 4b1f2fd + d8680a7 commit df1ba32

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+2301
-644
lines changed

.Rbuildignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
^_pkgdown.yml$
55
^appveyor\.yml$
66
^codemeta\.json$
7+
^.zenodo\.json$
78
^docs$
89
^man-roxygen$
910
^pkgdown$

.zenodo.json

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
{
2+
"creators": [
3+
{
4+
"affiliation": "Research Institute for Nature and Forest (INBO)",
5+
"name": "Onkelinx, Thierry",
6+
"orcid": "0000-0001-8804-4216"
7+
}
8+
],
9+
"contributors": [
10+
{
11+
"affiliation": "Research Institute for Nature and Forest (INBO)",
12+
"name": "Onkelinx, Thiery",
13+
"orcid": "0000-0001-8804-4216",
14+
"type": ["Contactperson", "ProjectLeader"]
15+
},
16+
{
17+
"affiliation": "Research Institute for Nature and Forest (INBO)",
18+
"name": "Vanderhaeghe, Floris",
19+
"orcid": "0000-0002-6378-6229",
20+
"type": "Projectmember"
21+
},
22+
{
23+
"name": "Research Institute for Nature and Forest (INBO)",
24+
"type": "Rightsholder"
25+
}
26+
],
27+
"keywords": [
28+
"r",
29+
"version control",
30+
"data.frame",
31+
"plain text"
32+
],
33+
"license": "GPL-3"
34+
}

DESCRIPTION

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: git2rdata
22
Title: Store and Retrieve Data.frames in a Git Repository
3-
Version: 0.0.3
3+
Version: 0.0.4
44
Authors@R: c(
55
person(
66
"Thierry", "Onkelinx", role = c("aut", "cre"),
@@ -14,7 +14,7 @@ Authors@R: c(
1414
"Research Institute for Nature and Forest",
1515
role = c("cph", "fnd"), email = "[email protected]"))
1616
Description: Make versioning of data.frame easy and efficient using git repositories.
17-
Depends: R (>= 3.4.0)
17+
Depends: R (>= 3.5.0)
1818
Imports:
1919
assertthat,
2020
git2r (>= 0.23.0),
@@ -36,12 +36,15 @@ BugReports: https://github.com/inbo/git2rdata/issues
3636
Collate:
3737
'clean_data_path.R'
3838
'git2rdata-package.R'
39+
'write_vc.R'
40+
'is_git2rdata.R'
41+
'is_git2rmeta.R'
3942
'list_data.R'
4043
'meta.R'
41-
'write_vc.R'
4244
'prune.R'
4345
'read_vc.R'
4446
'recent_commit.R'
4547
'reexport.R'
4648
'relabel.R'
49+
'upgrade_data.R'
4750
VignetteBuilder: knitr

NAMESPACE

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,12 @@
22

33
S3method(format,meta_detail)
44
S3method(format,meta_list)
5+
S3method(is_git2rdata,character)
6+
S3method(is_git2rdata,default)
7+
S3method(is_git2rdata,git_repository)
8+
S3method(is_git2rmeta,character)
9+
S3method(is_git2rmeta,default)
10+
S3method(is_git2rmeta,git_repository)
511
S3method(list_data,character)
612
S3method(list_data,default)
713
S3method(list_data,git_repository)
@@ -22,17 +28,23 @@ S3method(prune_meta,git_repository)
2228
S3method(read_vc,character)
2329
S3method(read_vc,default)
2430
S3method(read_vc,git_repository)
31+
S3method(recent_commit,default)
2532
S3method(recent_commit,git_repository)
2633
S3method(relabel,data.frame)
2734
S3method(relabel,default)
2835
S3method(relabel,list)
2936
S3method(rm_data,character)
3037
S3method(rm_data,default)
3138
S3method(rm_data,git_repository)
39+
S3method(upgrade_data,character)
40+
S3method(upgrade_data,default)
41+
S3method(upgrade_data,git_repository)
3242
S3method(write_vc,character)
3343
S3method(write_vc,default)
3444
S3method(write_vc,git_repository)
3545
export(commit)
46+
export(is_git2rdata)
47+
export(is_git2rmeta)
3648
export(list_data)
3749
export(meta)
3850
export(prune_meta)
@@ -44,6 +56,7 @@ export(relabel)
4456
export(repository)
4557
export(rm_data)
4658
export(status)
59+
export(upgrade_data)
4760
export(write_vc)
4861
importFrom(assertthat,"on_failure<-")
4962
importFrom(assertthat,assert_that)
@@ -64,6 +77,7 @@ importFrom(git2r,status)
6477
importFrom(git2r,workdir)
6578
importFrom(methods,setOldClass)
6679
importFrom(stats,setNames)
80+
importFrom(utils,packageVersion)
6781
importFrom(utils,read.table)
6882
importFrom(utils,write.table)
6983
importFrom(yaml,as.yaml)

NEWS.md

Lines changed: 68 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,47 +1,86 @@
1+
git2rdata 0.0.4 (2019-05-16)
2+
============================
3+
4+
### BREAKING FEATURES
5+
6+
* `write_vc()` stores the `git2rdata` version number to the metadata. Use `upgrade_data()` to update existing data.
7+
8+
### NEW FEATURES
9+
10+
* `read_vc()` checks the meta data hash. A mismatch results in an error.
11+
* The meta data gains a data hash. A mismatch throws a warning when reading the object. This tolerates updating the data by other software, while informing the user that such change occurred.
12+
* `is_git2rmeta()` validates metadata.
13+
* `list_data()` lists files with valid metadata.
14+
* `rm_data()` and `prune_meta()` remove files with valid metadata. Other files are untouched.
15+
* Files with invalid metadata yield a warning with `list_data()`, `rm_data()` and `prune_meta()`.
16+
17+
### Bugfixes
18+
19+
* `write_vc()` and `relabel()` handle empty strings (`''`) in characters and factors (#24).
20+
* `read_vc()` no longer treats `#` as a comment character.
21+
* `read_vc()` handles non ASCII characters on Windows.
22+
23+
### Other changes
24+
25+
* Use a faster algorithm to detect duplicates (suggestion by @brodieG).
26+
* Improve documentation.
27+
* Fix typo's in documentation, vignettes and README.
28+
* Add a ROpenSci review badge to the README.
29+
* The README mentions on upper bound on the size of dataframes.
30+
* Set lifecycle to "maturing" and repo status to "active".
31+
* The functions handle `root` containing regex expressions.
32+
* Rework `vignette("workflow", package = "git2rdata")`.
33+
* Update timings in `vignette("efficiency", package = "git2rdata")`
34+
* Minor tweaks in `vignette("plain_text", package = "git2rdata")`
35+
36+
git2rdata 0.0.3 (2019-03-12)
37+
============================
38+
39+
* Fix typo's in documentation, vignettes and README.
40+
141
git2rdata 0.0.2 (2019-02-26)
242
============================
343

444
### BREAKING CHANGES
545

6-
* metadata is added as a list to the objects rather than in YAML format.
7-
* the [yaml](https://cran.r-project.org/package=yaml) package is used to store the metadata list in YAML format.
8-
* `write_vc()` now uses the 'strict' argument instead of 'override'
9-
* the functionality `rm_data()` is split into `rm_data()` and `prune_meta()` (#9)
46+
* `meta()` appends the metadata as a list to the objects rather than in YAML format.
47+
* `yaml::write_yaml()` writes the metadata list in YAML format.
48+
* `write_vc()` now uses the 'strict' argument instead of 'override'.
49+
* `rm_data()` removes the data files. Use `prune_meta()` to remove left-over metadata files (#9).
1050

1151
### NEW FEATURES
1252

13-
* vignette on [efficiency](../articles/efficiency.html) added (#2)
14-
* existing vignette was split over three vignettes
15-
* focus on the [plain text format](../articles/plain_text.html)
16-
* focus on [version control](../articles/version_control.html)
17-
* focus on [workflows](../articles/workflow.html)
18-
* S4 methods are replaced by S3 methods (#8)
19-
* optimized factors use stable indices, resulting in smaller diffs when levels are added or removed (#13)
20-
* use `relabel()` to alter factor levels without changing their index (#13)
21-
* the raw data is written and read by base R functions instead of `readr` functions (#7)
22-
* `write_vc()` and `read_vc()` use the current working directory as default root (#6, @florisvdh)
23-
* the user can specify a string to code missing values (default = `NA`). This allows the storage of the character string `"NA"`.
53+
* Vignette on [efficiency](../articles/efficiency.html) added (#2).
54+
* Three separate vignettes instead of one large vignette.
55+
* Focus on the [plain text format](../arsticles/plain_text.html).
56+
* Focus on [version control](../articles/version_control.html).
57+
* Focus on [workflows](../articles/workflow.html).
58+
* S3 methods replace the old S4 methods (#8).
59+
* Optimized factors use stable indices. Adding or removing levels result in smaller diffs (#13).
60+
* Use `relabel()` to alter factor levels without changing their index (#13).
61+
* `write.table()` stores the raw data instead of `readr::write_tsv()` (#7). This avoids the `readr` dependency.
62+
* `write_vc()` and `read_vc()` use the current working directory as default root (#6, @florisvdh).
63+
* The user can specify a string to code missing values (default = `NA`). This allows the storage of the character string `"NA"`.
2464
* `write_vc()` returns a list of issues which potentially result in large diffs.
25-
* `list_data()` returns a vector with dataframes in the repository
65+
* `list_data()` returns a vector with dataframes in the repository.
2666

2767
### Other changes
2868

29-
* `write_vc()` allows to use a custom NA string
30-
* each helpfile contains a working example (#11)
31-
* README updated (#12)
32-
* Updated the rationale with links to the vignettes
33-
* `git2rdata` has a hexsticker logo
34-
* A DOI is added
35-
* The installation instructions use `remotes` and build the vignettes
36-
* `auto_commit()` was removed because of limited extra functionality over `git2r::commit()`
37-
* dataframes are read and written by base R functions instead of `readr` functions
69+
* `write_vc()` allows to use a custom `NA` string.
70+
* Each helpfile contains a working example (#11).
71+
* README updated (#12).
72+
* Updated the rationale with links to the vignettes.
73+
* `git2rdata` has a hexsticker logo.
74+
* Add the [![DOI](https://zenodo.org/badge/147685405.svg)](https://zenodo.org/badge/latestdoi/147685405).
75+
* The installation instructions use `remotes` and build the vignettes.
76+
* We removed `auto_commit()` because of limited extra functionality over `git2r::commit()`.
3877

3978
git2rdata 0.0.1 (2018-11-12)
4079
============================
4180

4281
### NEW FEATURES
4382

44-
* use readr to write and read plain text files
45-
* allows storage of strings with "NA" or special characters
46-
* handle ordered factors
47-
* stop handling complex numbers
83+
* Use `readr` to write and read plain text files.
84+
* Allow storage of strings with "NA" or special characters.
85+
* Handle ordered factors.
86+
* Stop handling complex numbers.

R/clean_data_path.R

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,23 @@
1-
#' Clean the data path
1+
#' Clean the Data Path
22
#' Strips any file extension from the path and adds the `".tsv"` and `".yml"`
33
#' file extensions
44
#' @inheritParams write_vc
5-
#' @param normalize normalize the path? Defaults to TRUE
6-
#' @return a named vector with "raw_file" and "meta_file", refering to the
7-
#' `".tsv"` and `".yml"` files
5+
#' @param normalize Normalize the path? Defaults to TRUE
6+
#' @return A named vector with "raw_file" and "meta_file", refering to the
7+
#' `".tsv"` and `".yml"` files.
88
#' @noRd
99
#' @family internal
10+
#' @importFrom assertthat assert_that is.flag noNA
1011
clean_data_path <- function(root, file, normalize = TRUE) {
12+
assert_that(is.flag(normalize), noNA(normalize))
1113
dir_name <- dirname(file)
1214
file <- gsub("\\..*$", "", basename(file))
1315
if (dir_name == ".") {
1416
path <- file.path(root, file)
1517
} else {
1618
path <- file.path(root, dir_name, file)
1719
}
18-
if (isTRUE(normalize)) {
20+
if (normalize) {
1921
path <- normalizePath(path, winslash = "/", mustWork = FALSE)
2022
}
2123
c(raw_file = paste0(path, ".tsv"), meta_file = paste0(path, ".yml"))

R/is_git2rdata.R

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
#' Check Whether a Git2rdata Object is Valid.
2+
#'
3+
#' A valid git2rdata object has valid metadata. The data hash must match the
4+
#' data hash stored in the metadata.
5+
#' @inheritParams write_vc
6+
#' @inheritParams is_git2rmeta
7+
#' @return A logical value. `TRUE` in case of a valid git2rdata object.
8+
#' Otherwise `FALSE`.
9+
#' @rdname is_git2rdata
10+
#' @export
11+
#' @family internal
12+
#' @template example-isgit2r
13+
is_git2rdata <- function(file, root = ".",
14+
message = c("none", "warning", "error")) {
15+
UseMethod("is_git2rdata", root)
16+
}
17+
18+
#' @export
19+
is_git2rdata.default <- function(file, root, message) {
20+
stop("a 'root' of class ", class(root), " is not supported")
21+
}
22+
23+
#' @export
24+
#' @importFrom assertthat assert_that is.string
25+
#' @importFrom yaml read_yaml as.yaml
26+
#' @importFrom utils packageVersion
27+
#' @importFrom git2r hash
28+
is_git2rdata.character <- function(file, root = ".",
29+
message = c("none", "warning", "error")) {
30+
assert_that(is.string(file), is.string(root))
31+
message <- match.arg(message)
32+
root <- normalizePath(root, winslash = "/", mustWork = TRUE)
33+
check_meta <- is_git2rmeta(file = file, root = root, message = message)
34+
if (!check_meta) {
35+
return(FALSE)
36+
}
37+
file <- clean_data_path(root = root, file = file)
38+
39+
if (!file.exists(file["raw_file"])) {
40+
msg <- "Data file missing."
41+
switch(message, error = stop(msg), warning = warning(msg))
42+
return(FALSE)
43+
}
44+
45+
# read the metadata
46+
meta_data <- read_yaml(file["meta_file"])
47+
48+
correct <- names(meta_data)
49+
correct <- paste(correct[correct != "..generic"], collapse = "\t")
50+
header <- readLines(file["raw_file"], n = 1, encoding = "UTF-8")
51+
if (correct != header) {
52+
msg <- paste("Corrupt data, incorrect header. Expecting:", correct)
53+
switch(message, error = stop(msg), warning = warning(msg))
54+
return(FALSE)
55+
}
56+
57+
if (meta_data[["..generic"]][["data_hash"]] != hashfile(file[["raw_file"]])) {
58+
msg <- "Corrupt data, mismatching data hash."
59+
switch(message, error = stop(msg), warning = warning(msg))
60+
return(FALSE)
61+
}
62+
63+
return(TRUE)
64+
}
65+
66+
#' @export
67+
#' @importFrom git2r workdir
68+
#' @include write_vc.R
69+
is_git2rdata.git_repository <- function(
70+
file, root, message = c("none", "warning", "error")) {
71+
is_git2rdata(file = file, root = workdir(root), message = message)
72+
}

0 commit comments

Comments
 (0)