GenericsAPI/KBaseMatrices.spec at master · kbaseapps/GenericsAPI · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
/*
@author jjeffryes
*/
module KBaseMatrices{
    /*
      The workspace ID for a Genome data object.
      @id ws KBaseGenomes.Genome
    */
    typedef string ws_genome_id;

    /*
      Reference to a handle ID
      @id handle
     */
    typedef string handle_ref;

    /*
      The workspace ID for a Genome data object.
      @id ws
    */
    typedef string ws_ref;

    /*
      The workspace ID for a A data object
      @id ws KBaseExperiments.AttributeMapping
    */
    typedef string ws_attributemapping_id;

    /*
      A simple 2D matrix of floating point numbers with labels/ids for rows and
      columns.  The matrix is stored as a list of lists, with the outer list
      containing rows, and the inner lists containing values for each column of
      that row.  Row/Col ids should be unique.

      row_ids - unique ids for rows.
      col_ids - unique ids for columns.
      values - two dimensional array indexed as: values[row][col]
      @metadata ws length(row_ids) as n_rows
      @metadata ws length(col_ids) as n_cols
    */
    typedef structure {
      list<string> row_ids;
      list<string> col_ids;
      list<list<float>> values;
    } FloatMatrix2D;

    /*
      The workspace id for a single end or paired end reads object
      @id ws KBaseMatrices.DifferentialExpressionMatrix KBaseFeatureValues.DifferentialExpressionMatrix
    */
    typedef string differential_expression_matrix_ref;

    /*
      A wrapper around a FloatMatrix2D designed for simple matrices of Expression
      data.  Rows map to features, and columns map to conditions.  The data type
      includes some information about normalization factors and contains
      mappings from row ids to features and col ids to conditions.

      KBaseMatrices Fields:
      description - short optional description of the dataset
      scale - raw, ln, log2, log10
      col_normalization - mean_center, median_center, mode_center, zscore
      row_normalization - mean_center, median_center, mode_center, zscore
      col_mapping - map from col_id to an id in the col_condition_set
      row_mapping - map from row_id to a id in the row_condition_set
      col_attributemapping_ref - a reference to a AttributeMapping that relates to the columns
      row_attributemapping_ref - a reference to a AttributeMapping that relates to the rows
      attributes - a mapping of additional information pertaining to the object
      search_attributes - a mapping of object information used by search
      data - contains values for (feature,condition) pairs, where
          features correspond to rows and conditions are columns
          (ie data.values[feature][condition])

      Additional Fields:
      genome_ref - a reference to the aligned genome
      feature_mapping - map from row_id to feature id in the genome
      diff_expr_matrix_ref - added to connect filtered expression matrix to differential expression matrix
          used for filtering

      Validation:
      @unique data.row_ids
      @unique data.col_ids
      @conditionally_required row_attributemapping_ref row_mapping
      @conditionally_required col_attributemapping_ref col_mapping
      @contains data.row_ids row_mapping
      @contains data.col_ids col_mapping
      @contains values(row_mapping) row_attributemapping_ref:instances
      @contains values(col_mapping) col_attributemapping_ref:instances
      @contains data.row_ids genome_ref:features.[*].id genome_ref:mrnas.[*].id genome_ref:cdss.[*].id genome_ref:non_codeing_features.[*].id
      @contains values(feature_mapping) genome_ref:features.[*].id genome_ref:mrnas.[*].id genome_ref:cdss.[*].id genome_ref:non_codeing_features.[*].id

      @optional description row_normalization col_normalization
      @optional col_mapping row_mapping col_attributemapping_ref row_attributemapping_ref
      @optional attributes search_attributes genome_ref feature_mapping diff_expr_matrix_ref

      @metadata ws scale
      @metadata ws row_normalization
      @metadata ws col_normalization
      @metadata ws genome_ref as genome
      @metadata ws col_attributemapping_ref as col_attribute_mapping
      @metadata ws row_attributemapping_ref as row_attribute_mapping
      @metadata ws length(data.row_ids) as feature_count
      @metadata ws length(data.col_ids) as condition_count
    */
    typedef structure {
      string description;
      string scale;
      string row_normalization;
      string col_normalization;
      mapping<string, string> col_mapping;
      ws_attributemapping_id col_attributemapping_ref;
      mapping<string, string> row_mapping;
      ws_attributemapping_id row_attributemapping_ref;
      mapping<string, string> attributes;
      list<string> search_attributes;
      ws_genome_id genome_ref;
      mapping<string, string> feature_mapping;
      differential_expression_matrix_ref diff_expr_matrix_ref;
      FloatMatrix2D data;
    } ExpressionMatrix;

    /*
      A wrapper around a FloatMatrix2D designed for simple matrices of Differential
      Expression data.  Rows map to features, and columns map to conditions.  The
      data type includes some information about normalization factors and contains
      mappings from row ids to features and col ids to conditions.

      KBaseMatrices Fields:
      description - short optional description of the dataset
      scale - raw, ln, log2, log10
      col_normalization - mean_center, median_center, mode_center, zscore
      row_normalization - mean_center, median_center, mode_center, zscore
      col_mapping - map from col_id to an id in the col_condition_set
      row_mapping - map from row_id to a id in the row_condition_set
      col_attributemapping_ref - a reference to a AttributeMapping that relates to the columns
      row_attributemapping_ref - a reference to a AttributeMapping that relates to the rows
      attributes - a mapping of additional information pertaining to the object
      search_attributes - a mapping of object information used by search

      data - contains values for (feature,condition) pairs, where
          features correspond to rows and conditions are columns
          (ie data.values[feature][condition])

      Additional Fields:
      genome_ref - a reference to the aligned genome
      feature_mapping - map from row_id to feature id in the genome

      Validation:
      @unique data.row_ids
      @unique data.col_ids
      @conditionally_required row_attributemapping_ref row_mapping
      @conditionally_required col_attributemapping_ref col_mapping
      @contains data.row_ids row_mapping
      @contains data.col_ids col_mapping
      @contains values(row_mapping) row_attributemapping_ref:instances
      @contains values(col_mapping) col_attributemapping_ref:instances
      @contains data.row_ids genome_ref:features.[*].id genome_ref:mrnas.[*].id genome_ref:cdss.[*].id genome_ref:non_codeing_features.[*].id
      @contains values(feature_mapping) genome_ref:features.[*].id genome_ref:mrnas.[*].id genome_ref:cdss.[*].id genome_ref:non_codeing_features.[*].id

      @optional description row_normalization col_normalization
      @optional col_mapping row_mapping col_attributemapping_ref row_attributemapping_ref
      @optional attributes search_attributes genome_ref feature_mapping

      @metadata ws scale
      @metadata ws row_normalization
      @metadata ws col_normalization
      @metadata ws genome_ref as genome
      @metadata ws col_attributemapping_ref as col_attribute_mapping
      @metadata ws row_attributemapping_ref as row_attribute_mapping
      @metadata ws length(data.row_ids) as feature_count
      @metadata ws length(data.col_ids) as condition_count
    */
    typedef structure {
      string description;
      string scale;
      string row_normalization;
      string col_normalization;
      mapping<string, string> col_mapping;
      ws_attributemapping_id col_attributemapping_ref;
      mapping<string, string> row_mapping;
      ws_attributemapping_id row_attributemapping_ref;
      mapping<string, string> attributes;
      list<string> search_attributes;
      ws_genome_id genome_ref;
      mapping<string, string> feature_mapping;
      FloatMatrix2D data;
    } DifferentialExpressionMatrix;

    /*
      A wrapper around a FloatMatrix2D designed for simple matrices of Fitness data
      for gene/feature knockouts.  Generally fitness is measured as growth rate
      for the knockout strain relative to wildtype.

      KBaseMatrices Fields:
      description - short optional description of the dataset
      scale - raw, ln, log2, log10
      col_normalization - mean_center, median_center, mode_center, zscore
      row_normalization - mean_center, median_center, mode_center, zscore
      col_mapping - map from col_id to an id in the col_condition_set
      row_mapping - map from row_id to a id in the row_condition_set
      col_attributemapping_ref - a reference to a AttributeMapping that relates to the columns
      row_attributemapping_ref - a reference to a AttributeMapping that relates to the rows
      attributes - a mapping of additional information pertaining to the object
      search_attributes - a mapping of object information used by search

      data - contains values for (feature,condition) pairs, where
          features correspond to rows and conditions are columns
          (ie data.values[feature][condition])

      Additional Fields:
      genome_ref - a reference to the aligned genome
      feature_mapping - map from row_id to a set feature ids in the genome

      Validation:
      @unique data.row_ids
      @unique data.col_ids
      @conditionally_required row_attributemapping_ref row_mapping
      @conditionally_required col_attributemapping_ref col_mapping
      @contains data.row_ids row_mapping
      @contains data.col_ids col_mapping
      @contains values(row_mapping) row_attributemapping_ref:instances
      @contains values(col_mapping) col_attributemapping_ref:instances
      @contains data.row_ids genome_ref:features.[*].id genome_ref:mrnas.[*].id genome_ref:cdss.[*].id genome_ref:non_codeing_features.[*].id
      @contains values(feature_mapping) genome_ref:features.[*].id genome_ref:mrnas.[*].id genome_ref:cdss.[*].id genome_ref:non_codeing_features.[*].id

      @optional description row_normalization col_normalization
      @optional col_mapping row_mapping col_attributemapping_ref row_attributemapping_ref
      @optional attributes search_attributes genome_ref feature_mapping

      @metadata ws scale
      @metadata ws row_normalization
      @metadata ws col_normalization
      @metadata ws genome_ref as genome
      @metadata ws col_attributemapping_ref as col_attribute_mapping
      @metadata ws row_attributemapping_ref as row_attribute_mapping
      @metadata ws length(data.row_ids) as feature_count
      @metadata ws length(data.col_ids) as condition_count
    */
    typedef structure {
      string description;
      string scale;
      string row_normalization;
      string col_normalization;
      mapping<string, string> col_mapping;
      ws_attributemapping_id col_attributemapping_ref;
      mapping<string, string> row_mapping;
      ws_attributemapping_id row_attributemapping_ref;
      mapping<string, string> attributes;
      list<string> search_attributes;
      ws_genome_id genome_ref;
      mapping<string, list<string>> feature_mapping;
      FloatMatrix2D data;
    } FitnessMatrix;

    /*
      A wrapper around a FloatMatrix2D designed for matrices of data assigned to individual reactions.

      The columns represent experimental conditions while the rows correspond to reactions from a single
      metabolic reconstruction or from a biochemistry object.

      KBaseMatrices Fields:
      description - short optional description of the dataset
      scale - raw, ln, log2, log10

      data - contains values for (reaction,condition) pairs, where
             reactions correspond to rows and conditions are columns
             (ie data.values[reaction][condition])

      Additional Fields:
      fbamodel_ref - a reference to a FBAModel object
      biochemistry_ref - a reference to a Biochemistry object
      expression_ref - a reference to a ExpressionMatrix object (from which reaction values can be derived)
      fba_ref - a reference to a FBA object (from which reaction fluxes can be derived)

      Validation:
      @unique data.row_ids
      @unique data.col_ids

      @optional description fbamodel_ref biochemistry_ref expression_ref fba_refs

      @metadata ws scale
      @metadata ws length(data.row_ids) as reaction_count
      @metadata ws length(data.col_ids) as condition_count
    */
    typedef structure {
      string description;
      string scale;
      list<ws_ref> fba_refs;
      ws_ref fbamodel_ref;
      ws_ref expression_ref;
      ws_ref biochemistry_ref;
      FloatMatrix2D data;
    } ReactionMatrix;

    /*
      A wrapper around a FloatMatrix2D designed for matrices of chemical concentration data. The
      columns represent experimental conditions while the rows correspond to individual
      identified metabolites

      KBaseMatrices Fields:
      description - short optional description of the dataset
      scale - raw, ln, log2, log10
      col_normalization - mean_center, median_center, mode_center, zscore
      row_normalization - mean_center, median_center, mode_center, zscore
      col_mapping - map from col_id to an id in the col_condition_set
      row_mapping - map from row_id to a id in the row_condition_set
      col_attributemapping_ref - a reference to a AttributeMapping that relates to the columns
      row_attributemapping_ref - a reference to a AttributeMapping that relates to the rows
      attributes - a mapping of additional information pertaining to the object
      search_attributes - a mapping of object information used by search

      data - contains values for (compound,condition) pairs, where
             compounds correspond to rows and conditions are columns
             (ie data.values[compound][condition])

      Additional Fields:
      biochemistry_ref - a reference to a biochemistry object
      biochemistry_mapping - map from row_id to a set compound ids in a biochemistry object

      Validation:
      @unique data.row_ids
      @unique data.col_ids
      @conditionally_required row_attributemapping_ref row_mapping
      @conditionally_required col_attributemapping_ref col_mapping
      @contains data.row_ids row_mapping
      @contains data.col_ids col_mapping
      @contains values(row_mapping) row_attributemapping_ref:instances
      @contains values(col_mapping) col_attributemapping_ref:instances
      @contains values(biochemistry_mapping) biochemistry_ref:compounds.[*].id

      @optional description row_normalization col_normalization
      @optional col_mapping row_mapping col_attributemapping_ref sample_set_ref
      @optional attributes search_attributes biochemistry_mapping unit type

      @metadata ws scale
      @metadata ws unit
      @metadata ws type
      @metadata ws row_normalization
      @metadata ws col_normalization
      @metadata ws col_attributemapping_ref as col_attribute_mapping
      @metadata ws row_attributemapping_ref as row_attribute_mapping
      @metadata ws sample_set_ref as sample_set
      @metadata ws length(data.row_ids) as compound_count
      @metadata ws length(data.col_ids) as sample_count
    */
    typedef structure {
      string description;
      string scale;
      string row_normalization;
      string col_normalization;
      mapping<string, string> col_mapping;
      ws_attributemapping_id col_attributemapping_ref;
      mapping<string, string> row_mapping;
      ws_attributemapping_id row_attributemapping_ref;
      mapping<string, string> attributes;
      list<string> search_attributes;
      ws_ref biochemistry_ref;
      mapping<string, list<string>> biochemistry_mapping;
      FloatMatrix2D data;
      ws_ref sample_set_ref;
      string unit;
      string type;
    } ChemicalAbundanceMatrix;

    /*
      A wrapper around a FloatMatrix2D designed for matrices of amplicon data. The
      columns represent experimental conditions while the rows correspond to individual
      amplicons.

      KBaseMatrices Fields:
      description - short optional description of the dataset
      scale - raw, ln, log2, log10
      col_normalization - mean_center, median_center, mode_center, zscore
      row_normalization - mean_center, median_center, mode_center, zscore
      col_mapping - map from col_id to an id in the col_condition_set
      row_mapping - map from row_id to a id in the row_condition_set
      col_attributemapping_ref - a reference to a AttributeMapping that relates to the columns
      row_attributemapping_ref - a reference to a AttributeMapping that relates to the rows
      attributes - a mapping of additional information pertaining to the object
      search_attributes - a mapping of object information used by search

      data - contains values for (amplicons,condition) pairs, where
             amplicons correspond to rows and conditions are columns
             (ie data.values[amplicons][condition])

      Additional Fields:
      reads_set_ref - a list of reference to the set of reads libraries that produced this table
      sequence_mapping - map from row_id to the representative sequence for that row

      Validation:
      @unique data.row_ids
      @unique data.col_ids
      @conditionally_required row_attributemapping_ref row_mapping
      @conditionally_required col_attributemapping_ref col_mapping
      @contains data.row_ids row_mapping
      @contains data.col_ids col_mapping
      @contains values(row_mapping) row_attributemapping_ref:instances
      @contains values(col_mapping) col_attributemapping_ref:instances

      @optional description row_normalization col_normalization
      @optional col_mapping row_mapping col_attributemapping_ref row_attributemapping_ref sample_set_ref
      @optional attributes search_attributes sequence_mapping reads_set_ref
      @optional amplicon_type extraction amplification
      @optional target_gene target_subfragment pcr_primers
      @optional library_kit library_layout library_screening_strategy
      @optional sequencing_center sequencing_date sequencing_technology sequencing_instrument
      @optional sequencing_quality_filter_cutoff
      @optional read_length_cutoff read_pairing
      @optional barcode_error_rate chimera_detection_and_removal
      @optional taxon_calling_method
      @optional denoise_method sequence_error_cutoff clustering_method clustering_cutoff

      @metadata ws scale
      @metadata ws row_normalization
      @metadata ws col_normalization
      @metadata ws col_attributemapping_ref as col_attribute_mapping
      @metadata ws row_attributemapping_ref as row_attribute_mapping
      @metadata ws length(data.row_ids) as amplicon_count
      @metadata ws length(data.col_ids) as condition_count
      @metadata ws description as additional_information
      @metadata ws amplicon_type
      @metadata ws extraction
      @metadata ws amplification
      @metadata ws target_gene
      @metadata ws length(target_subfragment) as target_subfragment_count
      @metadata ws pcr_primers
      @metadata ws library_kit
      @metadata ws library_layout
      @metadata ws library_screening_strategy
      @metadata ws sequencing_center
      @metadata ws sequencing_date
      @metadata ws sequencing_technology
      @metadata ws sequencing_instrument
      @metadata ws sequencing_quality_filter_cutoff
      @metadata ws read_length_cutoff
      @metadata ws read_pairing
      @metadata ws barcode_error_rate
      @metadata ws chimera_detection_and_removal
      @metadata ws length(taxon_calling_method) as taxon_calling_method_count
      @metadata ws denoise_method
      @metadata ws sequence_error_cutoff
      @metadata ws clustering_method
      @metadata ws clustering_cutoff
    */
    typedef structure {
      string description;
      string scale;
      string row_normalization;
      string col_normalization;
      mapping<string, string> col_mapping;
      ws_attributemapping_id col_attributemapping_ref;
      mapping<string, string> row_mapping;
      ws_attributemapping_id row_attributemapping_ref;
      mapping<string, string> attributes;
      list<string> search_attributes;
      ws_ref sample_set_ref;
      list<ws_ref> reads_set_ref;
      mapping<string, string> sequence_mapping;
      FloatMatrix2D data;
      string amplicon_type;
      string extraction;
      string amplification;
      string target_gene;
      list<string> target_subfragment;
      string pcr_primers;
      string library_kit;
      string library_layout;
      string library_screening_strategy;
      string sequencing_center;
      string sequencing_date;
      string sequencing_technology;
      string sequencing_instrument;
      int sequencing_quality_filter_cutoff;
      int read_length_cutoff;
      string read_pairing;
      float barcode_error_rate;
      string chimera_detection_and_removal;
      list<string> taxon_calling_method;
      string denoise_method;
      float sequence_error_cutoff;
      string clustering_method;
      float clustering_cutoff;
      handle_ref sequencing_file_handle;
    } AmpliconMatrix;
    /*
      A wrapper around a FloatMatrix2D designed for matrices of trait data for use in population
      studies. The columns represent genotypes while the rows correspond to traits.
      KBaseMatrices Fields:
      description - short optional description of the dataset
      scale - raw, ln, log2, log10
      col_normalization - mean_center, median_center, mode_center, zscore
      row_normalization - mean_center, median_center, mode_center, zscore
      col_mapping - map from col_id to an id in the col_condition_set
      row_mapping - map from row_id to a id in the row_condition_set
      col_attributemapping_ref - a reference to a AttributeMapping that relates to the columns
      row_attributemapping_ref - a reference to a AttributeMapping that relates to the rows
      attributes - a mapping of additional information pertaining to the object
      search_attributes - a mapping of object information used by search

      data - contains values for (genotype,trait) pairs, where
             traits correspond to the rows and genotypes are columns
             (ie data.values[amplicons][condition])

      Additional Fields:

      Validation:
      @unique data.row_ids
      @unique data.col_ids
      @conditionally_required row_attributemapping_ref row_mapping
      @conditionally_required col_attributemapping_ref col_mapping
      @contains data.row_ids row_mapping
      @contains data.col_ids col_mapping
      @contains values(row_mapping) row_attributemapping_ref:instances
      @contains values(col_mapping) col_attributemapping_ref:instances
      @contains set(trait_id,trait_description) row_attributemapping_ref:attributes.[*].attribute
      @contains set(individual_id,family_id,paternal_id,maternal_id,sex) col_attributemapping_ref:attributes.[*].attribute

      @optional description row_normalization col_normalization
      @optional col_mapping row_mapping
      @optional attributes search_attributes

      @metadata ws scale
      @metadata ws row_normalization
      @metadata ws col_normalization
      @metadata ws col_attributemapping_ref as col_attribute_mapping
      @metadata ws row_attributemapping_ref as row_attribute_mapping
      @metadata ws length(data.row_ids) as genotype_count
      @metadata ws length(data.col_ids) as trait_count
    */
    typedef structure {
      string description;
      string scale;
      string row_normalization;
      string col_normalization;
      mapping<string, string> col_mapping;
      ws_attributemapping_id col_attributemapping_ref;
      mapping<string, string> row_mapping;
      ws_attributemapping_id row_attributemapping_ref;
      mapping<string, string> attributes;
      list<string> search_attributes;
      FloatMatrix2D data;
    } TraitMatrix;
};