Skip to content

Commit 420e763

Browse files
committed
Add SAM header iterator methods [DRAFT]
1 parent 4dc620e commit 420e763

File tree

3 files changed

+145
-2
lines changed

3 files changed

+145
-2
lines changed

header.c

+64-1
Original file line numberDiff line numberDiff line change
@@ -1271,6 +1271,18 @@ int sam_hdr_rebuild(sam_hdr_t *bh) {
12711271
return 0;
12721272
}
12731273

1274+
sam_hdr_line_t *sam_hdr_first_line(sam_hdr_t *bh) {
1275+
if (!bh->hrecs) {
1276+
if (sam_hdr_fill_hrecs(bh) != 0) return NULL;
1277+
}
1278+
return bh->hrecs->first_line;
1279+
}
1280+
1281+
sam_hdr_line_t *sam_hdr_next_line(sam_hdr_t *bh, sam_hdr_line_t *line) {
1282+
line = line->global_next;
1283+
return (line != bh->hrecs->first_line)? line : NULL;
1284+
}
1285+
12741286
/*
12751287
* Appends a formatted line to an existing SAM header.
12761288
* Line is a full SAM header record, eg "@SQ\tSN:foo\tLN:100", with
@@ -1346,6 +1358,13 @@ int sam_hdr_add_line(sam_hdr_t *bh, const char *type, ...) {
13461358
return ret;
13471359
}
13481360

1361+
int sam_hdr_format_line_append(const sam_hdr_line_t *line, kstring_t *ks) {
1362+
if (!line) return -1;
1363+
1364+
if (build_header_line(line, ks) < 0) return -2;
1365+
return 0;
1366+
}
1367+
13491368
/*
13501369
* Returns a complete line of formatted text for a specific head type/ID
13511370
* combination. If ID_key is NULL then it returns the first line of the specified
@@ -1399,6 +1418,27 @@ int sam_hdr_find_line_pos(sam_hdr_t *bh, const char *type,
13991418
return 0;
14001419
}
14011420

1421+
/*
1422+
* Remove a line from the header via an iterator.
1423+
*/
1424+
1425+
sam_hdr_line_t *sam_hdr_remove_line(sam_hdr_t *bh, sam_hdr_line_t *line) {
1426+
if (!bh || !line) return NULL;
1427+
1428+
if (line->type == TYPEKEY("PG")) {
1429+
hts_log_warning("Removing PG lines is not supported!");
1430+
return NULL;
1431+
}
1432+
1433+
sam_hdr_line_t *next = sam_hdr_next_line(bh, line);
1434+
char type[2] = { line->type >> 8, line->type & 0xff };
1435+
if (sam_hrecs_remove_line(bh->hrecs, type, line, 1) < 0) return NULL;
1436+
1437+
if (bh->hrecs->refs_changed >= 0 && rebuild_target_arrays(bh) != 0) return NULL;
1438+
if (bh->hrecs->dirty) redact_header_text(bh);
1439+
return next;
1440+
}
1441+
14021442
/*
14031443
* Remove a line from the header by specifying a tag:value that uniquely
14041444
* identifies a line, i.e. the @SQ line containing "SN:ref1".
@@ -1908,6 +1948,19 @@ const char *sam_hdr_line_name(sam_hdr_t *bh,
19081948

19091949
/* ==== Key:val level methods ==== */
19101950

1951+
int sam_hdr_find_tag(const sam_hdr_line_t *line,
1952+
const char *key,
1953+
kstring_t *ks) {
1954+
if (!line || !key) return -2;
1955+
1956+
sam_hrec_tag_t *tag = sam_hrecs_find_key(line, key, NULL);
1957+
if (!tag || tag->len < 3) return -1;
1958+
1959+
ks_clear(ks);
1960+
if (kputsn(&tag->str[3], tag->len-3, ks) < 0) return -2;
1961+
return 0;
1962+
}
1963+
19111964
int sam_hdr_find_tag_id(sam_hdr_t *bh,
19121965
const char *type,
19131966
const char *ID_key,
@@ -1940,6 +1993,16 @@ int sam_hdr_find_tag_id(sam_hdr_t *bh,
19401993
return 0;
19411994
}
19421995

1996+
int sam_hdr_remove_tag(sam_hdr_t *bh,
1997+
sam_hdr_line_t *line,
1998+
const char *key) {
1999+
if (!bh || !line || !key) return -1;
2000+
2001+
int ret = sam_hrecs_remove_key(bh->hrecs, line, key);
2002+
if (ret == 0 && bh->hrecs->dirty) redact_header_text(bh);
2003+
return ret;
2004+
}
2005+
19432006
int sam_hdr_find_tag_pos(sam_hdr_t *bh,
19442007
const char *type,
19452008
int pos,
@@ -2635,7 +2698,7 @@ static int sam_hrecs_update(sam_hrecs_t *hrecs, sam_hrec_type_t *type, ...) {
26352698
* Returns the tag pointer on success
26362699
* NULL on failure
26372700
*/
2638-
sam_hrec_tag_t *sam_hrecs_find_key(sam_hrec_type_t *type,
2701+
sam_hrec_tag_t *sam_hrecs_find_key(const sam_hrec_type_t *type,
26392702
const char *key,
26402703
sam_hrec_tag_t **prev) {
26412704
sam_hrec_tag_t *tag, *p = NULL;

header.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ void sam_hrecs_free(sam_hrecs_t *hrecs);
290290
sam_hrec_type_t *sam_hrecs_find_type_id(sam_hrecs_t *hrecs, const char *type,
291291
const char *ID_key, const char *ID_value);
292292

293-
sam_hrec_tag_t *sam_hrecs_find_key(sam_hrec_type_t *type,
293+
sam_hrec_tag_t *sam_hrecs_find_key(const sam_hrec_type_t *type,
294294
const char *key,
295295
sam_hrec_tag_t **prev);
296296

htslib/sam.h

+80
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ DEALINGS IN THE SOFTWARE. */
4444
extern "C" {
4545
#endif
4646

47+
struct sam_hrec_type_s;
48+
4749
/// Highest SAM format version supported by this library
4850
#define SAM_FORMAT_VERSION "1.6"
4951

@@ -466,6 +468,22 @@ int sam_hdr_nref(const sam_hdr_t *h);
466468

467469
/* ==== Line level methods ==== */
468470

471+
/*! @typedef
472+
* @abstract Opaque type used as an iterator over header lines.
473+
*/
474+
typedef struct sam_hrec_type_s sam_hdr_line_t;
475+
476+
/// Return an iterator pointing to the first header line
477+
HTSLIB_EXPORT
478+
sam_hdr_line_t *sam_hdr_first_line(sam_hdr_t *h);
479+
480+
/// Return an iterator pointing to the next header line
481+
/*!
482+
* @return An iterator pointing to the next line, or NULL if there is none.
483+
*/
484+
HTSLIB_EXPORT
485+
sam_hdr_line_t *sam_hdr_next_line(sam_hdr_t *h, sam_hdr_line_t *line);
486+
469487
/// Add formatted lines to an existing header.
470488
/*!
471489
* @param lines Full SAM header record, eg "@SQ\tSN:foo\tLN:100", with
@@ -498,6 +516,36 @@ int sam_hdr_add_lines(sam_hdr_t *h, const char *lines, size_t len);
498516
HTSLIB_EXPORT
499517
int sam_hdr_add_line(sam_hdr_t *h, const char *type, ...);
500518

519+
/// Returns a complete line of formatted text for the line pointed to.
520+
/*!
521+
* @param line Iterator pointing to a header line
522+
* @param ks kstring to which to append the result
523+
* @return 0 on success;
524+
* -1 if @p line does not point to a header line
525+
* -2 on other failures
526+
*
527+
* Puts a complete line of formatted text for a specific line into @p ks.
528+
* Appends the text to the existing content in @p ks, if any.
529+
*/
530+
HTSLIB_EXPORT
531+
int sam_hdr_format_line_append(const sam_hdr_line_t *line, kstring_t *ks);
532+
533+
/// Returns a complete line of formatted text for the line pointed to.
534+
/*!
535+
* @param line Iterator pointing to a header line
536+
* @param ks kstring to hold the result
537+
* @return 0 on success;
538+
* -1 if @p line does not point to a header line
539+
* -2 on other failures
540+
*
541+
* Puts a complete line of formatted text for a specific line into @p ks.
542+
* Any existing content in @p ks will be overwritten.
543+
*/
544+
static inline int sam_hdr_format_line(const sam_hdr_line_t *line, kstring_t *ks)
545+
{
546+
return sam_hdr_format_line_append(line, ks_clear(ks));
547+
}
548+
501549
/// Returns a complete line of formatted text for a given type and ID.
502550
/*!
503551
* @param type Type of the searched line. Eg. "SQ"
@@ -536,6 +584,14 @@ HTSLIB_EXPORT
536584
int sam_hdr_find_line_pos(sam_hdr_t *h, const char *type,
537585
int pos, kstring_t *ks);
538586

587+
/// Remove line pointed to by iterator from a header
588+
/*!
589+
* @param line Iterator pointing to a header line
590+
* @return An iterator pointing to the following line, or NULL on error FIXME or if it was the last line
591+
*/
592+
HTSLIB_EXPORT
593+
sam_hdr_line_t *sam_hdr_remove_line(sam_hdr_t *h, sam_hdr_line_t *line);
594+
539595
/// Remove a line with given type / id from a header
540596
/*!
541597
* @param type Type of the searched line. Eg. "SQ"
@@ -687,6 +743,21 @@ const char *sam_hdr_line_name(sam_hdr_t *bh, const char *type, int pos);
687743

688744
/* ==== Key:val level methods ==== */
689745

746+
/// Return the value associated with a key for a header line identified by iterator
747+
/*!
748+
* @param line Iterator pointing to a header line
749+
* @param key Key of the searched tag. Eg. "LN"
750+
* @param ks kstring where the value will be written
751+
* @return 0 on success
752+
* -1 if the requested tag does not exist
753+
* -2 on other errors
754+
*
755+
* Looks for a specific key in the SAM header line pointed to by @p line and writes the
756+
* associated value into @p ks. Any pre-existing content in @p ks will be overwritten.
757+
*/
758+
HTSLIB_EXPORT
759+
int sam_hdr_find_tag(const sam_hdr_line_t *line, const char *key, kstring_t *ks);
760+
690761
/// Return the value associated with a key for a header line identified by ID_key:ID_val
691762
/*!
692763
* @param type Type of the line to which the tag belongs. Eg. "SQ"
@@ -724,6 +795,15 @@ int sam_hdr_find_tag_id(sam_hdr_t *h, const char *type, const char *ID_key, cons
724795
HTSLIB_EXPORT
725796
int sam_hdr_find_tag_pos(sam_hdr_t *h, const char *type, int pos, const char *key, kstring_t *ks);
726797

798+
/// Remove the key from the line pointed to by the iterator.
799+
/*!
800+
* @param line Iterator pointing to a header line
801+
* @param key Key of the targeted tag. Eg. "M5"
802+
* @return 1 if the key was removed; 0 if it was not present; -1 on error
803+
*/
804+
HTSLIB_EXPORT
805+
int sam_hdr_remove_tag(sam_hdr_t *h, sam_hdr_line_t *line, const char *key);
806+
727807
/// Remove the key from the line identified by type, ID_key and ID_value.
728808
/*!
729809
* @param type Type of the line to which the tag belongs. Eg. "SQ"

0 commit comments

Comments
 (0)