Skip to content

Commit c4f2512

Browse files
committed
Add SAM header iterator methods [DRAFT]
1 parent 7060387 commit c4f2512

File tree

2 files changed

+144
-0
lines changed

2 files changed

+144
-0
lines changed

header.c

+68
Original file line numberDiff line numberDiff line change
@@ -1271,6 +1271,22 @@ int sam_hdr_rebuild(sam_hdr_t *bh) {
12711271
return 0;
12721272
}
12731273

1274+
/*
1275+
* Iterators
1276+
*/
1277+
1278+
sam_hdr_line_itr_t *sam_hdr_line_itr_first(sam_hdr_t *bh) {
1279+
if (!bh->hrecs) {
1280+
if (sam_hdr_fill_hrecs(bh) != 0) return NULL;
1281+
}
1282+
return bh->hrecs->first_line;
1283+
}
1284+
1285+
sam_hdr_line_itr_t *sam_hdr_line_itr_next(sam_hdr_t *bh, sam_hdr_line_itr_t *iter) {
1286+
iter = iter->global_next;
1287+
return (iter != bh->hrecs->first_line)? iter : NULL;
1288+
}
1289+
12741290
/*
12751291
* Appends a formatted line to an existing SAM header.
12761292
* Line is a full SAM header record, eg "@SQ\tSN:foo\tLN:100", with
@@ -1399,6 +1415,15 @@ int sam_hdr_find_line_pos(sam_hdr_t *bh, const char *type,
13991415
return 0;
14001416
}
14011417

1418+
int sam_hdr_find_line_iter_append(const sam_hdr_line_itr_t *iter, kstring_t *ks) {
1419+
if (!iter) return -1;
1420+
1421+
if (build_header_line(iter, ks) < 0) return -2;
1422+
return 0;
1423+
}
1424+
1425+
/* ==== Key:val level methods ==== */
1426+
14021427
/*
14031428
* Remove a line from the header by specifying a tag:value that uniquely
14041429
* identifies a line, i.e. the @SQ line containing "SN:ref1".
@@ -1481,6 +1506,26 @@ int sam_hdr_remove_line_pos(sam_hdr_t *bh, const char *type, int position) {
14811506
return ret;
14821507
}
14831508

1509+
/*
1510+
* Remove a line from the header via an iterator.
1511+
*/
1512+
sam_hdr_line_itr_t *sam_hdr_remove_line_iter(sam_hdr_t *bh, sam_hdr_line_itr_t *iter) {
1513+
if (!bh || !iter) return NULL;
1514+
1515+
if (iter->type == TYPEKEY("PG")) {
1516+
hts_log_warning("Removing PG lines is not supported!");
1517+
return NULL;
1518+
}
1519+
1520+
sam_hdr_line_itr_t *next = sam_hdr_line_itr_next(bh, iter);
1521+
char type[2] = { iter->type >> 8, iter->type & 0xff };
1522+
if (sam_hrecs_remove_line(bh->hrecs, type, iter) < 0) return NULL;
1523+
1524+
if (bh->hrecs->refs_changed >= 0 && rebuild_target_arrays(bh) != 0) return NULL;
1525+
if (bh->hrecs->dirty) redact_header_text(bh);
1526+
return next;
1527+
}
1528+
14841529
/*
14851530
* Check if sam_hdr_update_line() is being used to change the name of
14861531
* a record, and if the new name is going to clash with an existing one.
@@ -1920,6 +1965,19 @@ int sam_hdr_find_tag_pos(sam_hdr_t *bh,
19201965
return 0;
19211966
}
19221967

1968+
int sam_hdr_find_tag_iter(sam_hdr_line_itr_t *iter,
1969+
const char *key,
1970+
kstring_t *ks) {
1971+
if (!iter || !key) return -2;
1972+
1973+
sam_hrec_tag_t *tag = sam_hrecs_find_key(iter, key, NULL);
1974+
if (!tag || tag->len < 3) return -1;
1975+
1976+
ks_clear(ks);
1977+
if (kputsn(&tag->str[3], tag->len-3, ks) < 0) return -2;
1978+
return 0;
1979+
}
1980+
19231981
int sam_hdr_remove_tag_id(sam_hdr_t *bh,
19241982
const char *type,
19251983
const char *ID_key,
@@ -1946,6 +2004,16 @@ int sam_hdr_remove_tag_id(sam_hdr_t *bh,
19462004
return ret;
19472005
}
19482006

2007+
int sam_hdr_remove_tag_iter(sam_hdr_t *bh,
2008+
sam_hdr_line_itr_t *iter,
2009+
const char *key) {
2010+
if (!bh || !iter || !key) return -1;
2011+
2012+
int ret = sam_hrecs_remove_key(bh->hrecs, iter, key);
2013+
if (ret == 0 && bh->hrecs->dirty) redact_header_text(bh);
2014+
return ret;
2015+
}
2016+
19492017
/*
19502018
* Reconstructs a kstring from the header hash table.
19512019
* Returns 0 on success

htslib/sam.h

+76
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ DEALINGS IN THE SOFTWARE. */
3737
extern "C" {
3838
#endif
3939

40+
struct sam_hrec_type_s;
41+
4042
/// Highest SAM format version supported by this library
4143
#define SAM_FORMAT_VERSION "1.6"
4244

@@ -456,6 +458,18 @@ const char *sam_hdr_str(sam_hdr_t *h);
456458
HTSLIB_EXPORT
457459
int sam_hdr_nref(const sam_hdr_t *h);
458460

461+
/* ==== Iterator methods ==== */
462+
463+
typedef struct sam_hrec_type_s sam_hdr_line_itr_t;
464+
465+
/// Get iterator pointing to the first header line
466+
HTSLIB_EXPORT
467+
sam_hdr_line_itr_t *sam_hdr_line_itr_first(sam_hdr_t *h);
468+
469+
/// Increment iterator to point to the next header line
470+
HTSLIB_EXPORT
471+
sam_hdr_line_itr_t *sam_hdr_line_itr_next(sam_hdr_t *h, sam_hdr_line_itr_t *iter);
472+
459473
/* ==== Line level methods ==== */
460474

461475
/// Add formatted lines to an existing header.
@@ -528,6 +542,36 @@ HTSLIB_EXPORT
528542
int sam_hdr_find_line_pos(sam_hdr_t *h, const char *type,
529543
int pos, kstring_t *ks);
530544

545+
/// Returns a complete line of formatted text for the line pointed to.
546+
/*!
547+
* @param iter Iterator pointing to a header line
548+
* @param ks kstring to hold the result
549+
* @return 0 on success;
550+
* -1 if @p iter does not point to a header line
551+
* -2 on other failures
552+
*
553+
* Puts a complete line of formatted text for a specific line into @p ks.
554+
* Appends the text to the existing content in @p ks, if any.
555+
*/
556+
HTSLIB_EXPORT
557+
int sam_hdr_find_line_iter_append(const sam_hdr_line_itr_t *iter, kstring_t *ks);
558+
559+
/// Returns a complete line of formatted text for the line pointed to.
560+
/*!
561+
* @param iter Iterator pointing to a header line
562+
* @param ks kstring to hold the result
563+
* @return 0 on success;
564+
* -1 if @p iter does not point to a header line
565+
* -2 on other failures
566+
*
567+
* Puts a complete line of formatted text for a specific line into @p ks.
568+
* Any existing content in @p ks will be overwritten.
569+
*/
570+
static inline int sam_hdr_find_line_iter(const sam_hdr_line_itr_t *iter, kstring_t *ks)
571+
{
572+
return sam_hdr_find_line_iter_append(iter, ks_clear(ks));
573+
}
574+
531575
/// Remove a line with given type / id from a header
532576
/*!
533577
* @param type Type of the searched line. Eg. "SQ"
@@ -564,6 +608,14 @@ int sam_hdr_remove_line_id(sam_hdr_t *h, const char *type, const char *ID_key, c
564608
HTSLIB_EXPORT
565609
int sam_hdr_remove_line_pos(sam_hdr_t *h, const char *type, int position);
566610

611+
/// Remove line pointed to by iterator from a header
612+
/*!
613+
* @param iter Iterator pointing to a header line
614+
* @return An iterator pointing to the following line, or NULL on error
615+
*/
616+
HTSLIB_EXPORT
617+
sam_hdr_line_itr_t *sam_hdr_remove_line_iter(sam_hdr_t *h, sam_hdr_line_itr_t *iter);
618+
567619
/// Add or update tag key,value pairs in a header line.
568620
/*!
569621
* @param type Type of the searched line. Eg. "SQ"
@@ -716,6 +768,21 @@ int sam_hdr_find_tag_id(sam_hdr_t *h, const char *type, const char *ID_key, cons
716768
HTSLIB_EXPORT
717769
int sam_hdr_find_tag_pos(sam_hdr_t *h, const char *type, int pos, const char *key, kstring_t *ks);
718770

771+
/// Return the value associated with a key for a header line identified by iterator
772+
/*!
773+
* @param iter Iterator pointing to a header line
774+
* @param key Key of the searched tag. Eg. "LN"
775+
* @param ks kstring where the value will be written
776+
* @return 0 on success
777+
* -1 if the requested tag does not exist
778+
* -2 on other errors
779+
*
780+
* Looks for a specific key in the SAM header line pointed to by @p iter and writes the
781+
* associated value into @p ks. Any pre-existing content in @p ks will be overwritten.
782+
*/
783+
HTSLIB_EXPORT
784+
int sam_hdr_find_tag_iter(sam_hdr_line_itr_t *iter, const char *key, kstring_t *ks);
785+
719786
/// Remove the key from the line identified by type, ID_key and ID_value.
720787
/*!
721788
* @param type Type of the line to which the tag belongs. Eg. "SQ"
@@ -727,6 +794,15 @@ int sam_hdr_find_tag_pos(sam_hdr_t *h, const char *type, int pos, const char *ke
727794
HTSLIB_EXPORT
728795
int sam_hdr_remove_tag_id(sam_hdr_t *h, const char *type, const char *ID_key, const char *ID_value, const char *key);
729796

797+
/// Remove the key from the line pointed to by the iterator.
798+
/*!
799+
* @param iter Iterator pointing to a header line
800+
* @param key Key of the targeted tag. Eg. "M5"
801+
* @return 1 if the key was removed; 0 if it was not present; -1 on error
802+
*/
803+
HTSLIB_EXPORT
804+
int sam_hdr_remove_tag_iter(sam_hdr_t *h, sam_hdr_line_itr_t *iter, const char *key);
805+
730806
/// Get the target id for a given reference sequence name
731807
/*!
732808
* @param ref Reference name

0 commit comments

Comments
 (0)