Skip to content

Commit 7453e99

Browse files
committed
Add bam_aux_first()/bam_aux_next() tagged aux field iterator API
Add new API functions for iterating through a BAM record's aux fields, inline accessor methods for field tag and type (or code can continue to use s-2 and *s), and a variant of bam_aux_del() that returns the (updated) iterator to the following field (for use in iterator-based loops that delete fields).
1 parent 9045785 commit 7453e99

File tree

2 files changed

+113
-14
lines changed

2 files changed

+113
-14
lines changed

htslib/sam.h

+49-1
Original file line numberDiff line numberDiff line change
@@ -1428,7 +1428,6 @@ int sam_passes_filter(const sam_hdr_t *h, const bam1_t *b,
14281428

14291429
/// Converts a BAM aux tag to SAM format
14301430
/*
1431-
* @param b Pointer to the bam record
14321431
* @param key Two letter tag key
14331432
* @param type Single letter type code: ACcSsIifHZB.
14341433
* @param tag Tag data pointer, in BAM format
@@ -1616,6 +1615,29 @@ static inline const uint8_t *sam_format_aux1(const uint8_t *key,
16161615
return NULL;
16171616
}
16181617

1618+
/// Return a pointer to a BAM record's first aux field
1619+
/** @param b Pointer to the BAM record
1620+
@return Aux field pointer, or NULL if the record has none
1621+
1622+
When NULL is returned, errno will also be set to ENOENT. ("Aux field pointers"
1623+
point to the TYPE byte within the auxiliary data for that field; but in general
1624+
it is unnecessary for user code to be aware of this.)
1625+
*/
1626+
HTSLIB_EXPORT
1627+
uint8_t *bam_aux_first(const bam1_t *b);
1628+
1629+
/// Return a pointer to a BAM record's next aux field
1630+
/** @param b Pointer to the BAM record
1631+
@param s Aux field pointer, as returned by bam_aux_first()/_next()/_get()
1632+
@return Pointer to the next aux field, or NULL if already last or error
1633+
1634+
Whenever NULL is returned, errno will also be set: ENOENT if @p s was the
1635+
record's last aux field; otherwise EINVAL, indicating that the BAM record's
1636+
aux data is corrupt.
1637+
*/
1638+
HTSLIB_EXPORT
1639+
uint8_t *bam_aux_next(const bam1_t *b, const uint8_t *s);
1640+
16191641
/// Return a pointer to an aux record
16201642
/** @param b Pointer to the bam record
16211643
@param tag Desired aux tag
@@ -1628,6 +1650,19 @@ static inline const uint8_t *sam_format_aux1(const uint8_t *key,
16281650
HTSLIB_EXPORT
16291651
uint8_t *bam_aux_get(const bam1_t *b, const char tag[2]);
16301652

1653+
/// Return the aux field's 2-character tag
1654+
/** @param s Aux field pointer, as returned by bam_aux_first()/_next()/_get()
1655+
@return Pointer to the tag characters, NOT NUL-terminated
1656+
*/
1657+
static inline
1658+
const char *bam_aux_tag(const uint8_t *s) { return (const char *) (s-2); }
1659+
1660+
/// Return the aux field's type character
1661+
/** @param s Aux field pointer, as returned by bam_aux_first()/_next()/_get()
1662+
@return The type character: one of cCsSiI/fd/A/Z/H/B
1663+
*/
1664+
static inline char bam_aux_type(const uint8_t *s) { return *s; }
1665+
16311666
/// Return a SAM formatting string containing a BAM tag
16321667
/** @param b Pointer to the bam record
16331668
@param tag Desired aux tag
@@ -1739,6 +1774,19 @@ int bam_aux_append(bam1_t *b, const char tag[2], char type, int len, const uint8
17391774
HTSLIB_EXPORT
17401775
int bam_aux_del(bam1_t *b, uint8_t *s);
17411776

1777+
/// Delete tag data from a bam record
1778+
/* @param b The bam record to update
1779+
@param s Pointer to the aux field to delete, as returned by
1780+
bam_aux_first()/_next()/_get()
1781+
@return Pointer to the following aux field, or NULL if none or on error
1782+
1783+
Whenever NULL is returned, errno will also be set: ENOENT if the aux field
1784+
deleted was the record's last one; otherwise EINVAL, indicating that the
1785+
BAM record's aux data is corrupt.
1786+
*/
1787+
HTSLIB_EXPORT
1788+
uint8_t *bam_aux_erase(bam1_t *b, uint8_t *s);
1789+
17421790
/// Update or add a string-type tag
17431791
/* @param b The bam record to update
17441792
@param tag Tag identifier

sam.c

+64-13
Original file line numberDiff line numberDiff line change
@@ -4508,7 +4508,52 @@ static inline uint8_t *skip_aux(uint8_t *s, uint8_t *end)
45084508
}
45094509
}
45104510

4511+
uint8_t *bam_aux_first(const bam1_t *b)
4512+
{
4513+
uint8_t *s = bam_get_aux(b);
4514+
uint8_t *end = b->data + b->l_data;
4515+
if (s >= end) { errno = ENOENT; return NULL; }
4516+
return s+2;
4517+
}
4518+
4519+
uint8_t *bam_aux_next(const bam1_t *b, const uint8_t *s)
4520+
{
4521+
uint8_t *end = b->data + b->l_data;
4522+
uint8_t *next = s? skip_aux((uint8_t *) s, end) : end;
4523+
if (next == NULL) goto bad_aux;
4524+
if (next >= end) { errno = ENOENT; return NULL; }
4525+
return next+2;
4526+
4527+
bad_aux:
4528+
hts_log_error("Corrupted aux data for read %s", bam_get_qname(b));
4529+
errno = EINVAL;
4530+
return NULL;
4531+
}
4532+
45114533
uint8_t *bam_aux_get(const bam1_t *b, const char tag[2])
4534+
{
4535+
uint8_t *s;
4536+
for (s = bam_aux_first(b); s; s = bam_aux_next(b, s))
4537+
if (s[-2] == tag[0] && s[-1] == tag[1]) {
4538+
// Check the tag value is valid and complete
4539+
uint8_t *e = skip_aux(s, b->data + b->l_data);
4540+
if (e == NULL) goto bad_aux;
4541+
if ((*s == 'Z' || *s == 'H') && *(e - 1) != '\0') goto bad_aux;
4542+
4543+
return s;
4544+
}
4545+
4546+
// errno now as set by bam_aux_first()/bam_aux_next()
4547+
return NULL;
4548+
4549+
bad_aux:
4550+
hts_log_error("Corrupted aux data for read %s", bam_get_qname(b));
4551+
errno = EINVAL;
4552+
return NULL;
4553+
}
4554+
4555+
#if 0
4556+
uint8_t *bam_aux_get_old(const bam1_t *b, const char tag[2])
45124557
{
45134558
uint8_t *s, *end, *t = (uint8_t *) tag;
45144559
uint16_t y = (uint16_t) t[0]<<8 | t[1];
@@ -4540,24 +4585,30 @@ uint8_t *bam_aux_get(const bam1_t *b, const char tag[2])
45404585
errno = EINVAL;
45414586
return NULL;
45424587
}
4588+
#endif
45434589

4544-
// s MUST BE returned by bam_aux_get()
4545-
int bam_aux_del(bam1_t *b, uint8_t *s)
4590+
uint8_t *bam_aux_erase(bam1_t *b, uint8_t *s)
45464591
{
4547-
uint8_t *p, *aux;
4548-
int l_aux = bam_get_l_aux(b);
4549-
aux = bam_get_aux(b);
4550-
p = s - 2;
4551-
s = skip_aux(s, aux + l_aux);
4552-
if (s == NULL) goto bad_aux;
4553-
memmove(p, s, l_aux - (s - aux));
4554-
b->l_data -= s - p;
4555-
return 0;
4592+
uint8_t *end = b->data + b->l_data;
4593+
uint8_t *next = skip_aux(s, end);
4594+
if (next == NULL) goto bad_aux;
45564595

4557-
bad_aux:
4596+
memmove(s-2, next, end - next);
4597+
b->l_data -= next - (s-2);
4598+
4599+
if (next >= end) { errno = ENOENT; return NULL; }
4600+
return s;
4601+
4602+
bad_aux:
45584603
hts_log_error("Corrupted aux data for read %s", bam_get_qname(b));
45594604
errno = EINVAL;
4560-
return -1;
4605+
return NULL;
4606+
}
4607+
4608+
int bam_aux_del(bam1_t *b, uint8_t *s)
4609+
{
4610+
s = bam_aux_erase(b, s);
4611+
return (s || errno == ENOENT)? 0 : -1;
45614612
}
45624613

45634614
int bam_aux_update_str(bam1_t *b, const char tag[2], int len, const char *data)

0 commit comments

Comments
 (0)