Skip to content

Commit a9d1c53

Browse files
committed
Add SAM header iterator methods [DRAFT]
1 parent 7060387 commit a9d1c53

File tree

2 files changed

+142
-0
lines changed

2 files changed

+142
-0
lines changed

header.c

+68
Original file line numberDiff line numberDiff line change
@@ -1271,6 +1271,22 @@ int sam_hdr_rebuild(sam_hdr_t *bh) {
12711271
return 0;
12721272
}
12731273

1274+
/*
1275+
* Iterators
1276+
*/
1277+
1278+
sam_hdr_line_itr_t *sam_hdr_line_itr_first(sam_hdr_t *bh) {
1279+
if (!bh->hrecs) {
1280+
if (sam_hdr_fill_hrecs(bh) != 0) return NULL;
1281+
}
1282+
return bh->hrecs->first_line;
1283+
}
1284+
1285+
sam_hdr_line_itr_t *sam_hdr_line_itr_next(sam_hdr_t *bh, sam_hdr_line_itr_t *iter) {
1286+
iter = iter->global_next;
1287+
return (iter != bh->hrecs->first_line)? iter : NULL;
1288+
}
1289+
12741290
/*
12751291
* Appends a formatted line to an existing SAM header.
12761292
* Line is a full SAM header record, eg "@SQ\tSN:foo\tLN:100", with
@@ -1399,6 +1415,15 @@ int sam_hdr_find_line_pos(sam_hdr_t *bh, const char *type,
13991415
return 0;
14001416
}
14011417

1418+
int sam_hdr_find_line_iter_append(const sam_hdr_line_itr_t *iter, kstring_t *ks) {
1419+
if (!iter) return -1;
1420+
1421+
if (build_header_line(iter, ks) < 0) return -2;
1422+
return 0;
1423+
}
1424+
1425+
/* ==== Key:val level methods ==== */
1426+
14021427
/*
14031428
* Remove a line from the header by specifying a tag:value that uniquely
14041429
* identifies a line, i.e. the @SQ line containing "SN:ref1".
@@ -1481,6 +1506,26 @@ int sam_hdr_remove_line_pos(sam_hdr_t *bh, const char *type, int position) {
14811506
return ret;
14821507
}
14831508

1509+
/*
1510+
* Remove a line from the header via an iterator.
1511+
*/
1512+
sam_hdr_line_itr_t *sam_hdr_remove_line_iter(sam_hdr_t *bh, sam_hdr_line_itr_t *iter) {
1513+
if (!bh || !iter) return NULL;
1514+
1515+
if (iter->type == TYPEKEY("PG")) {
1516+
hts_log_warning("Removing PG lines is not supported!");
1517+
return NULL;
1518+
}
1519+
1520+
sam_hdr_line_itr_t *next = sam_hdr_line_itr_next(bh, iter);
1521+
char type[2] = { iter->type >> 8, iter->type & 0xff };
1522+
if (sam_hrecs_remove_line(bh->hrecs, type, iter) < 0) return NULL;
1523+
1524+
if (bh->hrecs->refs_changed >= 0 && rebuild_target_arrays(bh) != 0) return NULL;
1525+
if (bh->hrecs->dirty) redact_header_text(bh);
1526+
return next;
1527+
}
1528+
14841529
/*
14851530
* Check if sam_hdr_update_line() is being used to change the name of
14861531
* a record, and if the new name is going to clash with an existing one.
@@ -1920,6 +1965,19 @@ int sam_hdr_find_tag_pos(sam_hdr_t *bh,
19201965
return 0;
19211966
}
19221967

1968+
int sam_hdr_find_tag_iter(sam_hdr_line_itr_t *iter,
1969+
const char *key,
1970+
kstring_t *ks) {
1971+
if (!iter || !key) return -2;
1972+
1973+
sam_hrec_tag_t *tag = sam_hrecs_find_key(iter, key, NULL);
1974+
if (!tag || tag->len < 3) return -1;
1975+
1976+
ks_clear(ks);
1977+
if (kputsn(&tag->str[3], tag->len-3, ks) < 0) return -2;
1978+
return 0;
1979+
}
1980+
19231981
int sam_hdr_remove_tag_id(sam_hdr_t *bh,
19241982
const char *type,
19251983
const char *ID_key,
@@ -1946,6 +2004,16 @@ int sam_hdr_remove_tag_id(sam_hdr_t *bh,
19462004
return ret;
19472005
}
19482006

2007+
int sam_hdr_remove_tag_iter(sam_hdr_t *bh,
2008+
sam_hdr_line_itr_t *iter,
2009+
const char *key) {
2010+
if (!bh || !iter || !key) return -1;
2011+
2012+
int ret = sam_hrecs_remove_key(bh->hrecs, iter, key);
2013+
if (ret == 0 && bh->hrecs->dirty) redact_header_text(bh);
2014+
return ret;
2015+
}
2016+
19492017
/*
19502018
* Reconstructs a kstring from the header hash table.
19512019
* Returns 0 on success

htslib/sam.h

+74
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ DEALINGS IN THE SOFTWARE. */
3737
extern "C" {
3838
#endif
3939

40+
struct sam_hrec_type_s;
41+
4042
/// Highest SAM format version supported by this library
4143
#define SAM_FORMAT_VERSION "1.6"
4244

@@ -456,6 +458,16 @@ const char *sam_hdr_str(sam_hdr_t *h);
456458
HTSLIB_EXPORT
457459
int sam_hdr_nref(const sam_hdr_t *h);
458460

461+
/* ==== Iterator methods ==== */
462+
463+
typedef struct sam_hrec_type_s sam_hdr_line_itr_t;
464+
465+
/// Get iterator pointing to the first header line
466+
sam_hdr_line_itr_t *sam_hdr_line_itr_first(sam_hdr_t *h);
467+
468+
/// Increment iterator to point to the next header line
469+
sam_hdr_line_itr_t *sam_hdr_line_itr_next(sam_hdr_t *h, sam_hdr_line_itr_t *iter);
470+
459471
/* ==== Line level methods ==== */
460472

461473
/// Add formatted lines to an existing header.
@@ -528,6 +540,36 @@ HTSLIB_EXPORT
528540
int sam_hdr_find_line_pos(sam_hdr_t *h, const char *type,
529541
int pos, kstring_t *ks);
530542

543+
/// Returns a complete line of formatted text for the line pointed to.
544+
/*!
545+
* @param iter Iterator pointing to a header line
546+
* @param ks kstring to hold the result
547+
* @return 0 on success;
548+
* -1 if @p iter does not point to a header line
549+
* -2 on other failures
550+
*
551+
* Puts a complete line of formatted text for a specific line into @p ks.
552+
* Appends the text to the existing content in @p ks, if any.
553+
*/
554+
HTSLIB_EXPORT
555+
int sam_hdr_find_line_iter_append(const sam_hdr_line_itr_t *iter, kstring_t *ks);
556+
557+
/// Returns a complete line of formatted text for the line pointed to.
558+
/*!
559+
* @param iter Iterator pointing to a header line
560+
* @param ks kstring to hold the result
561+
* @return 0 on success;
562+
* -1 if @p iter does not point to a header line
563+
* -2 on other failures
564+
*
565+
* Puts a complete line of formatted text for a specific line into @p ks.
566+
* Any existing content in @p ks will be overwritten.
567+
*/
568+
static inline int sam_hdr_find_line_iter(const sam_hdr_line_itr_t *iter, kstring_t *ks)
569+
{
570+
return sam_hdr_find_line_iter_append(iter, ks_clear(ks));
571+
}
572+
531573
/// Remove a line with given type / id from a header
532574
/*!
533575
* @param type Type of the searched line. Eg. "SQ"
@@ -564,6 +606,14 @@ int sam_hdr_remove_line_id(sam_hdr_t *h, const char *type, const char *ID_key, c
564606
HTSLIB_EXPORT
565607
int sam_hdr_remove_line_pos(sam_hdr_t *h, const char *type, int position);
566608

609+
/// Remove line pointed to by iterator from a header
610+
/*!
611+
* @param iter Iterator pointing to a header line
612+
* @return An iterator pointing to the following line, or NULL on error
613+
*/
614+
HTSLIB_EXPORT
615+
sam_hdr_line_itr_t *sam_hdr_remove_line_iter(sam_hdr_t *h, sam_hdr_line_itr_t *iter);
616+
567617
/// Add or update tag key,value pairs in a header line.
568618
/*!
569619
* @param type Type of the searched line. Eg. "SQ"
@@ -716,6 +766,21 @@ int sam_hdr_find_tag_id(sam_hdr_t *h, const char *type, const char *ID_key, cons
716766
HTSLIB_EXPORT
717767
int sam_hdr_find_tag_pos(sam_hdr_t *h, const char *type, int pos, const char *key, kstring_t *ks);
718768

769+
/// Return the value associated with a key for a header line identified by iterator
770+
/*!
771+
* @param iter Iterator pointing to a header line
772+
* @param key Key of the searched tag. Eg. "LN"
773+
* @param ks kstring where the value will be written
774+
* @return 0 on success
775+
* -1 if the requested tag does not exist
776+
* -2 on other errors
777+
*
778+
* Looks for a specific key in the SAM header line pointed to by @p iter and writes the
779+
* associated value into @p ks. Any pre-existing content in @p ks will be overwritten.
780+
*/
781+
HTSLIB_EXPORT
782+
int sam_hdr_find_tag_iter(sam_hdr_line_itr_t *iter, const char *key, kstring_t *ks);
783+
719784
/// Remove the key from the line identified by type, ID_key and ID_value.
720785
/*!
721786
* @param type Type of the line to which the tag belongs. Eg. "SQ"
@@ -727,6 +792,15 @@ int sam_hdr_find_tag_pos(sam_hdr_t *h, const char *type, int pos, const char *ke
727792
HTSLIB_EXPORT
728793
int sam_hdr_remove_tag_id(sam_hdr_t *h, const char *type, const char *ID_key, const char *ID_value, const char *key);
729794

795+
/// Remove the key from the line pointed to by the iterator.
796+
/*!
797+
* @param iter Iterator pointing to a header line
798+
* @param key Key of the targeted tag. Eg. "M5"
799+
* @return 1 if the key was removed; 0 if it was not present; -1 on error
800+
*/
801+
HTSLIB_EXPORT
802+
int sam_hdr_remove_tag_iter(sam_hdr_t *h, sam_hdr_line_itr_t *iter, const char *key);
803+
730804
/// Get the target id for a given reference sequence name
731805
/*!
732806
* @param ref Reference name

0 commit comments

Comments
 (0)