diff --git a/examples/temperature_dataset_labeled_example.yaml b/examples/temperature_dataset_labeled_example.yaml new file mode 100644 index 0000000..bb6766c --- /dev/null +++ b/examples/temperature_dataset_labeled_example.yaml @@ -0,0 +1,171 @@ +id: https://example.org/arrays +name: arrays-temperature-example-2 +title: Array Temperature Example Using NDArray Classes +description: |- + Example LinkML schema to demonstrate a 3D DataArray of temperature values with labeled axes + using classes containing arrays for the axes and data instead of using array slots/attributes. + Creating separate types for the array slots enables reuse and extension. +license: MIT + +prefixes: + linkml: https://w3id.org/linkml/ + wgs84: http://www.w3.org/2003/01/geo/wgs84_pos# + example: https://example.org/ + +default_prefix: example + +imports: + - linkml:types + +classes: + + Container: + tree_root: true + description: A container for a temperature dataset + attributes: + name: + identifier: true + range: string + temperature_dataset: + range: TemperatureDataset + required: true + inlined: true + latitude_series: + range: LatitudeInDegSeries + required: true + inlined: true + longitude_series: + range: LongitudeInDegSeries + required: true + inlined: true + + TemperatureDataset: + tree_root: true + implements: + - linkml:DataArray + # Maps to Xarray's DataArray https://docs.xarray.dev/en/stable/user-guide/data-structures.html#dataarray + attributes: + name: + identifier: true + range: string + latitude_in_deg: + range: LatitudeInDegSeries # schema requires this data not to be inlined + # RULE: Any referenced class used as a coordinate must have exactly one attribute containing the `array` key. + required: true + longitude_in_deg: + range: LongitudeInDegSeries + required: true + date: + range: DateSeries + required: true + inlined: true # this could also be not inlined but for example sake it is inlined + day_in_d: + range: DaysInDSinceSeries + inlined: true + # In the Xarray example (https://docs.xarray.dev/en/stable/user-guide/data-structures.html#coordinates), + # for coordinates of a DataArray, there is a non-dimension (constant) coordinate called `reference_time` + # that serves as the reference time for the dimension coordinate called `time`. This reference time seems + # more appropriate as an attribute on the `time` array, which can be done in LinkML, but not in Xarray where + # `time` is just an array. Xarray states that it does not make any direct use of the values associated with + # non-dimension coordinates, and this example of `reference_time` is better served in a different way + # in LinkML. Therefore, we omit support for non-dimension coordinates in the LinkML DataArray spec. + temperatures_in_K: + range: TemperaturesInKMatrix + required: true + inlined: true + # Place the `coordinates` key into the `temperatures_in_K` attribute instead of on the `TemperatureMatrix` + # class because the labeling is specific to the attribute within this `TemperatureDataset` class. + # RULE: Exactly one attribute within a DataArray class must contain the `coordinates` key. + # After merging, the `coordinates` key will be at the same level as `inlined`. + annotations: + coordinates: # OR indexed_by or labeled_by or array_labeled_by + - alias: lat + coordinate_slot: latitude_in_deg # RULE: The value must match the name of an attribute within the parent class + coordinate_dimensions: [0, 1] # Dimension (axis) indices can be used + - alias: lon + coordinate_slot: longitude_in_deg + coordinate_dimensions: ["x", "y"] # Dimension aliases can be used + - alias: date + coordinate_slot: date + coordinate_dimensions: [2] + - alias: day + coordinate_slot: day_in_d + coordinate_dimensions: [2] + + LatitudeInDegSeries: + description: A 2D array whose values represent latitude + attributes: + name: + identifier: true # an identifier is required for referencing in other classes + range: string + values: + required: true + multivalued: true + range: float + unit: + ucum_code: deg + array: # exactly one attribute within this class must be an array + exact_number_dimensions: 2 + + LongitudeInDegSeries: + description: A 2D array whose values represent longitude + attributes: + name: + identifier: true + range: string + values: + required: true + multivalued: true + range: float + unit: + ucum_code: deg + array: + exact_number_dimensions: 2 + + DateSeries: + description: A 1D series of dates + attributes: + values: + required: true + multivalued: true + range: string # In this example, we use a string to represent the date, e.g., "2020-01-01" + array: + exact_number_dimensions: 1 + + DaysInDSinceSeries: + description: A 1D series whose values represent the number of days since a reference date + attributes: + values: + required: true + multivalued: true + range: integer + unit: + ucum_code: d + array: + exact_number_dimensions: 1 + reference_date: + description: The reference date for the `day_in_d` values + required: true + range: string # for now, we are using a string to represent a date + + TemperaturesInKMatrix: + description: A 3D array of temperatures + attributes: + # no name because this should not be directly referenced + conversion_factor: + description: A conversion factor to apply to the temperature values + range: float + unit: + ucum_code: K + values: + required: true + multivalued: true + range: float + unit: + ucum_code: K + array: + exact_number_dimensions: 3 + dimensions: + - alias: "x" + - alias: "y" + - alias: "date" diff --git a/tests/input/schema/rgb_image_array.yaml b/tests/input/schema/rgb_image_array.yaml new file mode 100644 index 0000000..1630585 --- /dev/null +++ b/tests/input/schema/rgb_image_array.yaml @@ -0,0 +1,50 @@ +id: https://example.org/arrays +name: arrays-temperature-example +title: Array Temperature Example +description: |- + Example LinkML schema to demonstrate a 3D DataArray of temperature values with labeled axes +license: MIT + +prefixes: + linkml: https://w3id.org/linkml/ + wgs84: http://www.w3.org/2003/01/geo/wgs84_pos# + example: https://example.org/ + +default_prefix: example + +imports: + - linkml:types + +classes: + + # concept is also useful for dates - splitting year, month, day + # like a compound type / structured array + RGBTuple: + description: A tuple of red, green, and blue values + attributes: + red: + range: float + green: + range: float + blue: + range: float + + RGBImage: + attributes: + rgb: + range: float + array: + # NPtyping: NDArray[Shape["* x, * y, 3 rgb"] + exact_number_dimensions: 3 + dimensions: + - alias: x + - alias: y + - alias: rgb + exact_cardinality: 3 + description: r, g, b values + # annotations: + # binds: RGBTuple + # coords: + # - red: 0 + # - green: 1 + # - blue: 2 diff --git a/tests/input/schema/temperature_dataset_complex.yaml b/tests/input/schema/temperature_dataset_complex.yaml new file mode 100644 index 0000000..4af9518 --- /dev/null +++ b/tests/input/schema/temperature_dataset_complex.yaml @@ -0,0 +1,97 @@ +id: https://example.org/arrays +name: arrays-temperature-example +title: Array Temperature Example +description: |- + Example LinkML schema to demonstrate a 3D DataArray of temperature values with labeled axes +license: MIT + +prefixes: + linkml: https://w3id.org/linkml/ + wgs84: http://www.w3.org/2003/01/geo/wgs84_pos# + example: https://example.org/ + +default_prefix: example + +imports: + - linkml:types + +classes: + + TemperatureDataset: + tree_root: true + implements: + - linkml:DataArray + annotations: + array_data_mapping: + # See also the xarray DataArray data structure + # https://docs.xarray.dev/en/latest/user-guide/data-structures.html#dataarray + # with the main differences being + # 1) the coordinates are not DataArray objects + # 2) scalar coordinates are not supported (these represent a property of the + # entire array, independent of index) + # + data: temperatures_in_K + + # The 3 dimensions of the array attribute "temperatures_in_K" are named + # "x", "y", and "t" in the DataArray. These dims do not need to be the same as the + # dimensions of the array attribute. A name must be provided for each dimension + # of the data array. + dims: ["x", "y", "t"] # NOTE: y without quotes is parsed as True in YAML 1.1 + + # An array attribute within this class that is not the data array above may + # serve as a coordinate for a set of dimensions in the data array. + # In most cases, a 1D array is a coordinate for a single dimension in the + # data array, but it is possible to have an N-dimensional array that is a + # coordinate for N dimensions in the data array. The format is: + # : + # The number of dimensions of a coordinate must equal the length of the + # list specified here. + coords: + # Here, the latitude for the temperature value at index (i,j,k) is equal to + # latitude_in_deg[i,j]. Similarly, the longitude for the temperature value at + # index (i,j,k) is equal to longitude_in_deg[i,j]. The date for the temperature + # value at index (i,j,k) is equal to date_in_d[k]. The days_with_rain for the + # temperature value at index (i,j,k) is equal to days_with_rain[k]. + latitude_in_deg: ["x", "y"] + longitude_in_deg: ["x", "y"] + date_in_d: "t" + days_with_rain: "t" + attributes: + name: + identifier: true + range: string + latitude_in_deg: + required: true + range: float + unit: + ucum_code: deg + array: + exact_number_dimensions: 2 + longitude_in_deg: + required: true + range: float + unit: + ucum_code: deg + array: + exact_number_dimensions: 2 + date_in_d: + required: true + range: date + array: + exact_number_dimensions: 1 + days_with_rain: + required: true + range: boolean + array: + exact_number_dimensions: 1 + temperatures_in_K: + range: float + required: true + unit: + ucum_code: K + array: + exact_number_dimensions: 3 + dimensions: + - alias: lat + - alias: lon + - alias: date