Skip to content

Commit 24406ca

Browse files
committed
Add SAM header iterator methods [DRAFT]
1 parent 2f2318e commit 24406ca

File tree

3 files changed

+145
-2
lines changed

3 files changed

+145
-2
lines changed

header.c

+64-1
Original file line numberDiff line numberDiff line change
@@ -1271,6 +1271,18 @@ int sam_hdr_rebuild(sam_hdr_t *bh) {
12711271
return 0;
12721272
}
12731273

1274+
sam_hdr_line_t *sam_hdr_first_line(sam_hdr_t *bh) {
1275+
if (!bh->hrecs) {
1276+
if (sam_hdr_fill_hrecs(bh) != 0) return NULL;
1277+
}
1278+
return bh->hrecs->first_line;
1279+
}
1280+
1281+
sam_hdr_line_t *sam_hdr_next_line(sam_hdr_t *bh, sam_hdr_line_t *line) {
1282+
line = line->global_next;
1283+
return (line != bh->hrecs->first_line)? line : NULL;
1284+
}
1285+
12741286
/*
12751287
* Appends a formatted line to an existing SAM header.
12761288
* Line is a full SAM header record, eg "@SQ\tSN:foo\tLN:100", with
@@ -1346,6 +1358,13 @@ int sam_hdr_add_line(sam_hdr_t *bh, const char *type, ...) {
13461358
return ret;
13471359
}
13481360

1361+
int sam_hdr_format_line_append(const sam_hdr_line_t *line, kstring_t *ks) {
1362+
if (!line) return -1;
1363+
1364+
if (build_header_line(line, ks) < 0) return -2;
1365+
return 0;
1366+
}
1367+
13491368
/*
13501369
* Returns a complete line of formatted text for a specific head type/ID
13511370
* combination. If ID_key is NULL then it returns the first line of the specified
@@ -1399,6 +1418,27 @@ int sam_hdr_find_line_pos(sam_hdr_t *bh, const char *type,
13991418
return 0;
14001419
}
14011420

1421+
/*
1422+
* Remove a line from the header via an iterator.
1423+
*/
1424+
1425+
sam_hdr_line_t *sam_hdr_remove_line(sam_hdr_t *bh, sam_hdr_line_t *line) {
1426+
if (!bh || !line) return NULL;
1427+
1428+
if (line->type == TYPEKEY("PG")) {
1429+
hts_log_warning("Removing PG lines is not supported!");
1430+
return NULL;
1431+
}
1432+
1433+
sam_hdr_line_t *next = sam_hdr_next_line(bh, line);
1434+
char type[2] = { line->type >> 8, line->type & 0xff };
1435+
if (sam_hrecs_remove_line(bh->hrecs, type, line) < 0) return NULL;
1436+
1437+
if (bh->hrecs->refs_changed >= 0 && rebuild_target_arrays(bh) != 0) return NULL;
1438+
if (bh->hrecs->dirty) redact_header_text(bh);
1439+
return next;
1440+
}
1441+
14021442
/*
14031443
* Remove a line from the header by specifying a tag:value that uniquely
14041444
* identifies a line, i.e. the @SQ line containing "SN:ref1".
@@ -1857,6 +1897,19 @@ const char *sam_hdr_line_name(sam_hdr_t *bh,
18571897

18581898
/* ==== Key:val level methods ==== */
18591899

1900+
int sam_hdr_find_tag(const sam_hdr_line_t *line,
1901+
const char *key,
1902+
kstring_t *ks) {
1903+
if (!line || !key) return -2;
1904+
1905+
sam_hrec_tag_t *tag = sam_hrecs_find_key(line, key, NULL);
1906+
if (!tag || tag->len < 3) return -1;
1907+
1908+
ks_clear(ks);
1909+
if (kputsn(&tag->str[3], tag->len-3, ks) < 0) return -2;
1910+
return 0;
1911+
}
1912+
18601913
int sam_hdr_find_tag_id(sam_hdr_t *bh,
18611914
const char *type,
18621915
const char *ID_key,
@@ -1889,6 +1942,16 @@ int sam_hdr_find_tag_id(sam_hdr_t *bh,
18891942
return 0;
18901943
}
18911944

1945+
int sam_hdr_remove_tag(sam_hdr_t *bh,
1946+
sam_hdr_line_t *line,
1947+
const char *key) {
1948+
if (!bh || !line || !key) return -1;
1949+
1950+
int ret = sam_hrecs_remove_key(bh->hrecs, line, key);
1951+
if (ret == 0 && bh->hrecs->dirty) redact_header_text(bh);
1952+
return ret;
1953+
}
1954+
18921955
int sam_hdr_find_tag_pos(sam_hdr_t *bh,
18931956
const char *type,
18941957
int pos,
@@ -2572,7 +2635,7 @@ static int sam_hrecs_update(sam_hrecs_t *hrecs, sam_hrec_type_t *type, ...) {
25722635
* Returns the tag pointer on success
25732636
* NULL on failure
25742637
*/
2575-
sam_hrec_tag_t *sam_hrecs_find_key(sam_hrec_type_t *type,
2638+
sam_hrec_tag_t *sam_hrecs_find_key(const sam_hrec_type_t *type,
25762639
const char *key,
25772640
sam_hrec_tag_t **prev) {
25782641
sam_hrec_tag_t *tag, *p = NULL;

header.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ void sam_hrecs_free(sam_hrecs_t *hrecs);
290290
sam_hrec_type_t *sam_hrecs_find_type_id(sam_hrecs_t *hrecs, const char *type,
291291
const char *ID_key, const char *ID_value);
292292

293-
sam_hrec_tag_t *sam_hrecs_find_key(sam_hrec_type_t *type,
293+
sam_hrec_tag_t *sam_hrecs_find_key(const sam_hrec_type_t *type,
294294
const char *key,
295295
sam_hrec_tag_t **prev);
296296

htslib/sam.h

+80
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ DEALINGS IN THE SOFTWARE. */
3737
extern "C" {
3838
#endif
3939

40+
struct sam_hrec_type_s;
41+
4042
/// Highest SAM format version supported by this library
4143
#define SAM_FORMAT_VERSION "1.6"
4244

@@ -458,6 +460,22 @@ int sam_hdr_nref(const sam_hdr_t *h);
458460

459461
/* ==== Line level methods ==== */
460462

463+
/*! @typedef
464+
* @abstract Opaque type used as an iterator over header lines.
465+
*/
466+
typedef struct sam_hrec_type_s sam_hdr_line_t;
467+
468+
/// Return an iterator pointing to the first header line
469+
HTSLIB_EXPORT
470+
sam_hdr_line_t *sam_hdr_first_line(sam_hdr_t *h);
471+
472+
/// Return an iterator pointing to the next header line
473+
/*!
474+
* @return An iterator pointing to the next line, or NULL if there is none.
475+
*/
476+
HTSLIB_EXPORT
477+
sam_hdr_line_t *sam_hdr_next_line(sam_hdr_t *h, sam_hdr_line_t *line);
478+
461479
/// Add formatted lines to an existing header.
462480
/*!
463481
* @param lines Full SAM header record, eg "@SQ\tSN:foo\tLN:100", with
@@ -490,6 +508,36 @@ int sam_hdr_add_lines(sam_hdr_t *h, const char *lines, size_t len);
490508
HTSLIB_EXPORT
491509
int sam_hdr_add_line(sam_hdr_t *h, const char *type, ...);
492510

511+
/// Returns a complete line of formatted text for the line pointed to.
512+
/*!
513+
* @param line Iterator pointing to a header line
514+
* @param ks kstring to which to append the result
515+
* @return 0 on success;
516+
* -1 if @p line does not point to a header line
517+
* -2 on other failures
518+
*
519+
* Puts a complete line of formatted text for a specific line into @p ks.
520+
* Appends the text to the existing content in @p ks, if any.
521+
*/
522+
HTSLIB_EXPORT
523+
int sam_hdr_format_line_append(const sam_hdr_line_t *line, kstring_t *ks);
524+
525+
/// Returns a complete line of formatted text for the line pointed to.
526+
/*!
527+
* @param line Iterator pointing to a header line
528+
* @param ks kstring to hold the result
529+
* @return 0 on success;
530+
* -1 if @p line does not point to a header line
531+
* -2 on other failures
532+
*
533+
* Puts a complete line of formatted text for a specific line into @p ks.
534+
* Any existing content in @p ks will be overwritten.
535+
*/
536+
static inline int sam_hdr_format_line(const sam_hdr_line_t *line, kstring_t *ks)
537+
{
538+
return sam_hdr_format_line_append(line, ks_clear(ks));
539+
}
540+
493541
/// Returns a complete line of formatted text for a given type and ID.
494542
/*!
495543
* @param type Type of the searched line. Eg. "SQ"
@@ -528,6 +576,14 @@ HTSLIB_EXPORT
528576
int sam_hdr_find_line_pos(sam_hdr_t *h, const char *type,
529577
int pos, kstring_t *ks);
530578

579+
/// Remove line pointed to by iterator from a header
580+
/*!
581+
* @param line Iterator pointing to a header line
582+
* @return An iterator pointing to the following line, or NULL on error FIXME or if it was the last line
583+
*/
584+
HTSLIB_EXPORT
585+
sam_hdr_line_t *sam_hdr_remove_line(sam_hdr_t *h, sam_hdr_line_t *line);
586+
531587
/// Remove a line with given type / id from a header
532588
/*!
533589
* @param type Type of the searched line. Eg. "SQ"
@@ -679,6 +735,21 @@ const char *sam_hdr_line_name(sam_hdr_t *bh, const char *type, int pos);
679735

680736
/* ==== Key:val level methods ==== */
681737

738+
/// Return the value associated with a key for a header line identified by iterator
739+
/*!
740+
* @param line Iterator pointing to a header line
741+
* @param key Key of the searched tag. Eg. "LN"
742+
* @param ks kstring where the value will be written
743+
* @return 0 on success
744+
* -1 if the requested tag does not exist
745+
* -2 on other errors
746+
*
747+
* Looks for a specific key in the SAM header line pointed to by @p line and writes the
748+
* associated value into @p ks. Any pre-existing content in @p ks will be overwritten.
749+
*/
750+
HTSLIB_EXPORT
751+
int sam_hdr_find_tag(const sam_hdr_line_t *line, const char *key, kstring_t *ks);
752+
682753
/// Return the value associated with a key for a header line identified by ID_key:ID_val
683754
/*!
684755
* @param type Type of the line to which the tag belongs. Eg. "SQ"
@@ -716,6 +787,15 @@ int sam_hdr_find_tag_id(sam_hdr_t *h, const char *type, const char *ID_key, cons
716787
HTSLIB_EXPORT
717788
int sam_hdr_find_tag_pos(sam_hdr_t *h, const char *type, int pos, const char *key, kstring_t *ks);
718789

790+
/// Remove the key from the line pointed to by the iterator.
791+
/*!
792+
* @param line Iterator pointing to a header line
793+
* @param key Key of the targeted tag. Eg. "M5"
794+
* @return 1 if the key was removed; 0 if it was not present; -1 on error
795+
*/
796+
HTSLIB_EXPORT
797+
int sam_hdr_remove_tag(sam_hdr_t *h, sam_hdr_line_t *line, const char *key);
798+
719799
/// Remove the key from the line identified by type, ID_key and ID_value.
720800
/*!
721801
* @param type Type of the line to which the tag belongs. Eg. "SQ"

0 commit comments

Comments
 (0)