Skip to content

Commit 88586e5

Browse files
committed
Add sam_hdr_passes_filter() for SAM header filter expressions
1 parent 420e763 commit 88586e5

File tree

3 files changed

+105
-4
lines changed

3 files changed

+105
-4
lines changed

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -459,7 +459,7 @@ hts-object-files: $(LIBHTS_OBJS)
459459
bgzf.o bgzf.pico: bgzf.c config.h $(htslib_hts_h) $(htslib_bgzf_h) $(htslib_hfile_h) $(htslib_thread_pool_h) $(htslib_hts_endian_h) cram/pooled_alloc.h $(hts_internal_h) $(htslib_khash_h)
460460
errmod.o errmod.pico: errmod.c config.h $(htslib_hts_h) $(htslib_ksort_h) $(htslib_hts_os_h)
461461
kstring.o kstring.pico: kstring.c config.h $(htslib_kstring_h)
462-
header.o header.pico: header.c config.h $(textutils_internal_h) $(header_h)
462+
header.o header.pico: header.c config.h $(htslib_hts_expr_h) $(textutils_internal_h) $(header_h)
463463
hfile.o hfile.pico: hfile.c config.h $(htslib_hfile_h) $(hfile_internal_h) $(htslib_kstring_h) $(hts_internal_h) $(htslib_khash_h)
464464
hfile_gcs.o hfile_gcs.pico: hfile_gcs.c config.h $(htslib_hts_h) $(htslib_kstring_h) $(hfile_internal_h)
465465
hfile_libcurl.o hfile_libcurl.pico: hfile_libcurl.c config.h $(hfile_internal_h) $(htslib_hts_h) $(htslib_kstring_h) $(htslib_khash_h)

header.c

+94
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3434
#include <string.h>
3535
#include <assert.h>
3636
#include <errno.h>
37+
#include "htslib/hts_expr.h"
3738
#include "textutils_internal.h"
3839
#include "header.h"
3940

@@ -898,6 +899,84 @@ static int sam_hrecs_parse_lines(sam_hrecs_t *hrecs, const char *hdr, size_t len
898899
return 0;
899900
}
900901

902+
static int sam_hrecs_tag_lookup(sam_hrec_type_t *h_type, const char *tagname,
903+
char type, hts_expr_val_t *result) {
904+
sam_hrec_tag_t *tag = sam_hrecs_find_key(h_type, tagname, NULL);
905+
if (tag == NULL || tag->len < 3) {
906+
result->is_str = (type == 'Z');
907+
result->s.l = 0;
908+
result->d = 0.0;
909+
result->is_true = 0;
910+
return 0;
911+
}
912+
913+
result->is_true = 1;
914+
switch (type) {
915+
case 'f':
916+
result->is_str = 0;
917+
result->d = strtod(&tag->str[3], NULL);
918+
return 0;
919+
920+
case 'i':
921+
result->is_str = 0;
922+
result->d = strtoll(&tag->str[3], NULL, 0);
923+
return 0;
924+
925+
case 'Z':
926+
result->is_str = 1;
927+
ks_clear(&result->s);
928+
return (kputsn(&tag->str[3], tag->len-3, &result->s) >= 0)? 0 : -1;
929+
}
930+
931+
return -1;
932+
}
933+
934+
static int sam_hrecs_sym_lookup(void *data, char *str, char **end,
935+
hts_expr_val_t *result) {
936+
sam_hrec_type_t *h_type = (sam_hrec_type_t *) data;
937+
938+
switch (*str) {
939+
case 't':
940+
if (memcmp(str, "type", 4) == 0) {
941+
*end = &str[4];
942+
result->is_str = 1;
943+
ks_clear(&result->s);
944+
kputc_(h_type->type >> 8, &result->s);
945+
kputc(h_type->type & 0xff, &result->s);
946+
return (ks_len(&result->s) == 2)? 0 : -1;
947+
}
948+
break;
949+
950+
case '@':
951+
if (isalpha_c(str[1]) && isalpha_c(str[2])) {
952+
*end = &str[3];
953+
result->is_str = 0;
954+
result->is_true = result->d = (h_type->type == TYPEKEY(&str[1]));
955+
return 0;
956+
}
957+
break;
958+
959+
case '[':
960+
if (str[1] & str[2]) {
961+
if (str[3] == ']') {
962+
*end = &str[4];
963+
return sam_hrecs_tag_lookup(h_type, &str[1], 'Z', result);
964+
}
965+
else if (str[3] == ':' && str[4] && str[5] == ']') {
966+
if (!strchr("fiZ", str[4])) {
967+
hts_log_error("Unrecognised %.6s type code", str);
968+
return -1;
969+
}
970+
*end = &str[6];
971+
return sam_hrecs_tag_lookup(h_type, &str[1], str[4], result);
972+
}
973+
}
974+
break;
975+
}
976+
977+
return -1;
978+
}
979+
901980
/*! Update sam_hdr_t target_name and target_len arrays
902981
*
903982
* @return 0 on success; -1 on failure
@@ -1808,6 +1887,21 @@ int sam_hdr_remove_lines(sam_hdr_t *bh, const char *type, const char *id, void *
18081887
return ret;
18091888
}
18101889

1890+
int sam_hdr_passes_filter(sam_hdr_t *bh, sam_hdr_line_t *line, hts_filter_t *filt) {
1891+
hts_expr_val_t result = HTS_EXPR_VAL_INIT;
1892+
int ret;
1893+
if (hts_filter_eval2(filt, line, sam_hrecs_sym_lookup, &result) >= 0) {
1894+
ret = result.is_true;
1895+
}
1896+
else {
1897+
hts_log_error("Couldn't process sam_hdr filter expression");
1898+
ret = -1;
1899+
}
1900+
1901+
hts_expr_val_free(&result);
1902+
return ret;
1903+
}
1904+
18111905
int sam_hdr_count_lines(sam_hdr_t *bh, const char *type) {
18121906
int count;
18131907
sam_hrec_type_t *first_ty, *itr_ty;

htslib/sam.h

+10-3
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ DEALINGS IN THE SOFTWARE. */
4444
extern "C" {
4545
#endif
4646

47+
struct hts_filter_t;
4748
struct sam_hrec_type_s;
4849

4950
/// Highest SAM format version supported by this library
@@ -859,6 +860,15 @@ hts_pos_t sam_hdr_tid2len(const sam_hdr_t *h, int tid);
859860
*/
860861
static inline int bam_name2id(sam_hdr_t *h, const char *ref) { return sam_hdr_name2tid(h, ref); }
861862

863+
/// Check whether a header line passes an hts_filter
864+
/** @param h Pointer to the header structure previously read
865+
* @param line Iterator pointing to a header line
866+
* @param filt Pointer to the filter, created from hts_filter_init
867+
* @return 1 if it passes, 0 if not, and <0 on error
868+
*/
869+
HTSLIB_EXPORT
870+
int sam_hdr_passes_filter(sam_hdr_t *h, sam_hdr_line_t *line, struct hts_filter_t *filt);
871+
862872
/// Generate a unique \@PG ID: value
863873
/*!
864874
* @param name Name of the program. Eg. samtools
@@ -1505,9 +1515,6 @@ const char *sam_parse_region(sam_hdr_t *h, const char *s, int *tid,
15051515
HTSLIB_EXPORT
15061516
int sam_write1(samFile *fp, const sam_hdr_t *h, const bam1_t *b) HTS_RESULT_USED;
15071517

1508-
// Forward declaration, see hts_expr.h for full.
1509-
struct hts_filter_t;
1510-
15111518
/// sam_passes_filter - Checks whether a record passes an hts_filter.
15121519
/** @param h Pointer to the header structure previously read
15131520
* @param b Pointer to the BAM record to be checked

0 commit comments

Comments
 (0)