Skip to content

Commit 5af7995

Browse files
committed
Add sam_hdr_passes_filter() for SAM header filter expressions
1 parent c4f2512 commit 5af7995

File tree

3 files changed

+105
-4
lines changed

3 files changed

+105
-4
lines changed

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,7 @@ hts-object-files: $(LIBHTS_OBJS)
359359
bgzf.o bgzf.pico: bgzf.c config.h $(htslib_hts_h) $(htslib_bgzf_h) $(htslib_hfile_h) $(htslib_thread_pool_h) $(htslib_hts_endian_h) cram/pooled_alloc.h $(hts_internal_h) $(htslib_khash_h)
360360
errmod.o errmod.pico: errmod.c config.h $(htslib_hts_h) $(htslib_ksort_h) $(htslib_hts_os_h)
361361
kstring.o kstring.pico: kstring.c config.h $(htslib_kstring_h)
362-
header.o header.pico: header.c config.h $(textutils_internal_h) $(header_h)
362+
header.o header.pico: header.c config.h $(htslib_hts_expr_h) $(textutils_internal_h) $(header_h)
363363
hfile.o hfile.pico: hfile.c config.h $(htslib_hfile_h) $(hfile_internal_h) $(htslib_kstring_h) $(hts_internal_h) $(htslib_khash_h)
364364
hfile_gcs.o hfile_gcs.pico: hfile_gcs.c config.h $(htslib_hts_h) $(htslib_kstring_h) $(hfile_internal_h)
365365
hfile_libcurl.o hfile_libcurl.pico: hfile_libcurl.c config.h $(hfile_internal_h) $(htslib_hts_h) $(htslib_kstring_h) $(htslib_khash_h)

header.c

+94
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3434
#include <string.h>
3535
#include <assert.h>
3636
#include <errno.h>
37+
#include "htslib/hts_expr.h"
3738
#include "textutils_internal.h"
3839
#include "header.h"
3940

@@ -898,6 +899,84 @@ static int sam_hrecs_parse_lines(sam_hrecs_t *hrecs, const char *hdr, size_t len
898899
return 0;
899900
}
900901

902+
static int sam_hrecs_tag_lookup(sam_hrec_type_t *h_type, const char *tagname,
903+
char type, hts_expr_val_t *result) {
904+
sam_hrec_tag_t *tag = sam_hrecs_find_key(h_type, tagname, NULL);
905+
if (tag == NULL || tag->len < 3) {
906+
result->is_str = (type == 'Z');
907+
result->s.l = 0;
908+
result->d = 0.0;
909+
result->is_true = 0;
910+
return 0;
911+
}
912+
913+
result->is_true = 1;
914+
switch (type) {
915+
case 'f':
916+
result->is_str = 0;
917+
result->d = strtod(&tag->str[3], NULL);
918+
return 0;
919+
920+
case 'i':
921+
result->is_str = 0;
922+
result->d = strtoll(&tag->str[3], NULL, 0);
923+
return 0;
924+
925+
case 'Z':
926+
result->is_str = 1;
927+
ks_clear(&result->s);
928+
return (kputsn(&tag->str[3], tag->len-3, &result->s) >= 0)? 0 : -1;
929+
}
930+
931+
return -1;
932+
}
933+
934+
static int sam_hrecs_sym_lookup(void *data, char *str, char **end,
935+
hts_expr_val_t *result) {
936+
sam_hrec_type_t *h_type = (sam_hrec_type_t *) data;
937+
938+
switch (*str) {
939+
case 't':
940+
if (memcmp(str, "type", 4) == 0) {
941+
*end = &str[4];
942+
result->is_str = 1;
943+
ks_clear(&result->s);
944+
kputc_(h_type->type >> 8, &result->s);
945+
kputc(h_type->type & 0xff, &result->s);
946+
return (ks_len(&result->s) == 2)? 0 : -1;
947+
}
948+
break;
949+
950+
case '@':
951+
if (isalpha_c(str[1]) && isalpha_c(str[2])) {
952+
*end = &str[3];
953+
result->is_str = 0;
954+
result->is_true = result->d = (h_type->type == TYPEKEY(&str[1]));
955+
return 0;
956+
}
957+
break;
958+
959+
case '[':
960+
if (str[1] & str[2]) {
961+
if (str[3] == ']') {
962+
*end = &str[4];
963+
return sam_hrecs_tag_lookup(h_type, &str[1], 'Z', result);
964+
}
965+
else if (str[3] == ':' && str[4] && str[5] == ']') {
966+
if (!strchr("fiZ", str[4])) {
967+
hts_log_error("Unrecognised %.6s type code", str);
968+
return -1;
969+
}
970+
*end = &str[6];
971+
return sam_hrecs_tag_lookup(h_type, &str[1], str[4], result);
972+
}
973+
}
974+
break;
975+
}
976+
977+
return -1;
978+
}
979+
901980
/*! Update sam_hdr_t target_name and target_len arrays
902981
*
903982
* @return 0 on success; -1 on failure
@@ -1762,6 +1841,21 @@ int sam_hdr_remove_lines(sam_hdr_t *bh, const char *type, const char *id, void *
17621841
return ret;
17631842
}
17641843

1844+
int sam_hdr_passes_filter(sam_hdr_t *bh, sam_hdr_line_itr_t *iter, hts_filter_t *filt) {
1845+
hts_expr_val_t result;
1846+
int ret;
1847+
if (hts_filter_eval(filt, iter, sam_hrecs_sym_lookup, &result) >= 0) {
1848+
ret = result.is_true;
1849+
}
1850+
else {
1851+
hts_log_error("Couldn't process sam_hdr filter expression");
1852+
ret = -1;
1853+
}
1854+
1855+
hts_expr_val_free(&result);
1856+
return ret;
1857+
}
1858+
17651859
int sam_hdr_count_lines(sam_hdr_t *bh, const char *type) {
17661860
int count;
17671861
sam_hrec_type_t *first_ty, *itr_ty;

htslib/sam.h

+10-3
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ DEALINGS IN THE SOFTWARE. */
3737
extern "C" {
3838
#endif
3939

40+
struct hts_filter_t;
4041
struct sam_hrec_type_s;
4142

4243
/// Highest SAM format version supported by this library
@@ -847,6 +848,15 @@ hts_pos_t sam_hdr_tid2len(const sam_hdr_t *h, int tid);
847848
*/
848849
static inline int bam_name2id(sam_hdr_t *h, const char *ref) { return sam_hdr_name2tid(h, ref); }
849850

851+
/// Check whether a header line passes an hts_filter
852+
/** @param h Pointer to the header structure previously read
853+
* @param iter Iterator pointing to a header line
854+
* @param filt Pointer to the filter, created from hts_filter_init
855+
* @return 1 if it passes, 0 if not, and <0 on error
856+
*/
857+
HTSLIB_EXPORT
858+
int sam_hdr_passes_filter(sam_hdr_t *h, sam_hdr_line_itr_t *iter, struct hts_filter_t *filt);
859+
850860
/// Generate a unique \@PG ID: value
851861
/*!
852862
* @param name Name of the program. Eg. samtools
@@ -1485,9 +1495,6 @@ const char *sam_parse_region(sam_hdr_t *h, const char *s, int *tid,
14851495
HTSLIB_EXPORT
14861496
int sam_write1(samFile *fp, const sam_hdr_t *h, const bam1_t *b) HTS_RESULT_USED;
14871497

1488-
// Forward declaration, see hts_expr.h for full.
1489-
struct hts_filter_t;
1490-
14911498
/// sam_passes_filter - Checks whether a record passes an hts_filter.
14921499
/** @param h Pointer to the header structure previously read
14931500
* @param b Pointer to the BAM record to be checked

0 commit comments

Comments
 (0)