Skip to content

Commit

Permalink
Add hts_parse_decimal() and hts_parse_region() flags parameter [DRAFT]
Browse files Browse the repository at this point in the history
Add the first flag, HTS_PARSE_THOUSANDS_SEP.

[IN PROGRESS]  Need to figure out whether hts_parse_region() is workable
with a strend argument and the possibility of colons in chromosome names...
  • Loading branch information
jmarshall committed Aug 27, 2015
1 parent 306664a commit 4361a3d
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 29 deletions.
17 changes: 12 additions & 5 deletions hts.c
Original file line number Diff line number Diff line change
Expand Up @@ -1824,7 +1824,7 @@ static inline long long push_digit(long long i, char c)
return 10 * i + digit;
}

long long hts_parse_decimal(const char *str, char **end)
long long hts_parse_decimal(const char *str, char **strend, int flags)
{
long long n = 0;
int decimals = 0, e = 0, lost = 0;
Expand All @@ -1837,7 +1837,7 @@ long long hts_parse_decimal(const char *str, char **end)
if (*s == '+' || *s == '-') sign = *s++;
while (*s)
if (isdigit(*s)) n = push_digit(n, *s++);
else if (*s == ',') s++;
else if (*s == ',' && (flags & HTS_PARSE_THOUSANDS_SEP)) s++;
else break;

if (*s == '.') {
Expand All @@ -1860,7 +1860,7 @@ long long hts_parse_decimal(const char *str, char **end)
fprintf(stderr, "[W::%s] discarding fractional part of %.*s\n",
__func__, (int)(s - str), str);

if (end) *end = (char *) s;
if (strend) *strend = (char *) s;
else if (*s && hts_verbose >= 2)
fprintf(stderr, "[W::%s] ignoring unknown characters after %.*s[%s]\n",
__func__, (int)(s - str), str, s);
Expand All @@ -1869,6 +1869,12 @@ long long hts_parse_decimal(const char *str, char **end)
}

const char *hts_parse_reg(const char *s, int *beg, int *end)
{
return hts_parse_region(s, NULL, beg, end, HTS_PARSE_THOUSANDS_SEP);
}

const char *
hts_parse_region(const char *s, char **strend, int *beg, int *end, int flags)
{
char *hyphen;
const char *colon = strrchr(s, ':');
Expand All @@ -1877,11 +1883,12 @@ const char *hts_parse_reg(const char *s, int *beg, int *end)
return s + strlen(s);
}

*beg = hts_parse_decimal(colon+1, &hyphen) - 1;
*beg = hts_parse_decimal(colon+1, &hyphen, flags) - 1;
if (*beg < 0) *beg = 0;

// FIXME \0 vs. return NULL
if (*hyphen == '\0') *end = INT_MAX;
else if (*hyphen == '-') *end = hts_parse_decimal(hyphen+1, NULL);
else if (*hyphen == '-') *end = hts_parse_decimal(hyphen+1, strend, flags);
else return NULL;

if (*beg >= *end) return NULL;
Expand Down
39 changes: 26 additions & 13 deletions htslib/hts.h
Original file line number Diff line number Diff line change
Expand Up @@ -477,27 +477,40 @@ hts_idx_t *hts_idx_load2(const char *fn, const char *fnidx);
int hts_idx_get_stat(const hts_idx_t* idx, int tid, uint64_t* mapped, uint64_t* unmapped);
uint64_t hts_idx_get_n_no_coor(const hts_idx_t* idx);


#define HTS_PARSE_THOUSANDS_SEP 1 ///< Ignore ',' separators within numbers

/// Parse a numeric string
/** The number may be expressed in scientific notation, and may contain commas
in the integer part (before any decimal point or E notation).
@param str String to be parsed
@param end If non-NULL, set on return to point to the first character
in @a str after those forming the parsed number
/** The number may be expressed in scientific notation, and optionally may
contain commas in the integer part (before any decimal point or E notation).
@param str String to be parsed
@param strend If non-NULL, set on return to point to the first character
in @a str after those forming the parsed number
@param flags Or'ed-together combination of HTS_PARSE_* flags
@return Converted value of the parsed number.
When @a end is NULL, a warning will be printed (if hts_verbose is 2
When @a strend is NULL, a warning will be printed (if hts_verbose is 2
or more) if there are any trailing characters after the number.
*/
long long hts_parse_decimal(const char *str, char **end);
long long hts_parse_decimal(const char *str, char **strend, int flags);

/// Equivalent to hts_parse_region(str, NULL, beg, end, HTS_PARSE_THOUSANDS_SEP)
const char *hts_parse_reg(const char *str, int *beg, int *end);

/// Parse a "CHR:START-END"-style region string
/** @param str String to be parsed
@param beg Set on return to the 0-based start of the region
@param end Set on return to the 1-based end of the region
@return Pointer to the colon or '\0' after the reference sequence name,
or NULL if @a str could not be parsed.
/** @param str String to be parsed
@param strend If non-NULL, set on return to point to the first character
in @a str after those forming the parsed region
@param beg Set on return to the 0-based start of the region
@param end Set on return to the 1-based end of the region
@param flags Or'ed-together combination of HTS_PARSE_* flags
@return Pointer to the colon or terminating character after the reference
sequence name, or NULL if @a str could not be parsed.
When @a strend is NULL, a warning will be printed (if hts_verbose is 2
or more) if there are any trailing characters after the region string.
*/
const char *hts_parse_reg(const char *str, int *beg, int *end);
const char *hts_parse_region(const char *str, char **strend, int *beg, int *end, int flags);

hts_itr_t *hts_itr_query(const hts_idx_t *idx, int tid, int beg, int end, hts_readrec_func *readrec);
void hts_itr_destroy(hts_itr_t *iter);
Expand Down
8 changes: 4 additions & 4 deletions regidx.c
Original file line number Diff line number Diff line change
Expand Up @@ -297,11 +297,11 @@ int regidx_parse_bed(const char *line, char **chr_beg, char **chr_end, reg_t *re
*chr_end = se-1;

ss = se+1;
reg->start = hts_parse_decimal(ss, &se);
reg->start = hts_parse_decimal(ss, &se, 0);
if ( ss==se ) { fprintf(stderr,"Could not parse bed line: %s\n", line); return -2; }

ss = se+1;
reg->end = hts_parse_decimal(ss, &se) - 1;
reg->end = hts_parse_decimal(ss, &se, 0) - 1;
if ( ss==se ) { fprintf(stderr,"Could not parse bed line: %s\n", line); return -2; }

return 0;
Expand All @@ -322,15 +322,15 @@ int regidx_parse_tab(const char *line, char **chr_beg, char **chr_end, reg_t *re
*chr_end = se-1;

ss = se+1;
reg->start = hts_parse_decimal(ss, &se) - 1;
reg->start = hts_parse_decimal(ss, &se, 0) - 1;
if ( ss==se ) { fprintf(stderr,"Could not parse bed line: %s\n", line); return -2; }

if ( !se[0] || !se[1] )
reg->end = reg->start;
else
{
ss = se+1;
reg->end = hts_parse_decimal(ss, &se);
reg->end = hts_parse_decimal(ss, &se, 0);
if ( ss==se ) reg->end = reg->start;
else reg->end--;
}
Expand Down
14 changes: 7 additions & 7 deletions synced_bcf_reader.c
Original file line number Diff line number Diff line change
Expand Up @@ -887,7 +887,7 @@ static bcf_sr_regions_t *_regions_init_string(const char *str)
if ( *ep==':' )
{
sp = ep+1;
from = hts_parse_decimal(sp,(char**)&ep);
from = hts_parse_decimal(sp,(char**)&ep,0);
if ( sp==ep )
{
fprintf(stderr,"[%s:%d %s] Could not parse the region(s): %s\n", __FILE__,__LINE__,__FUNCTION__,str);
Expand All @@ -906,7 +906,7 @@ static bcf_sr_regions_t *_regions_init_string(const char *str)
}
ep++;
sp = ep;
to = hts_parse_decimal(sp,(char**)&ep);
to = hts_parse_decimal(sp,(char**)&ep,0);
if ( *ep && *ep!=',' )
{
fprintf(stderr,"[%s:%d %s] Could not parse the region(s): %s\n", __FILE__,__LINE__,__FUNCTION__,str);
Expand Down Expand Up @@ -953,15 +953,15 @@ static int _regions_parse_line(char *line, int ichr,int ifrom,int ito, char **ch
if ( i<=k ) return -1;
if ( k==l )
{
*from = *to = hts_parse_decimal(ss, &tmp);
*from = *to = hts_parse_decimal(ss, &tmp, 0);
if ( tmp==ss ) return -1;
}
else
{
if ( k==ifrom )
*from = hts_parse_decimal(ss, &tmp);
*from = hts_parse_decimal(ss, &tmp, 0);
else
*to = hts_parse_decimal(ss, &tmp);
*to = hts_parse_decimal(ss, &tmp, 0);
if ( ss==tmp ) return -1;

for (i=k; i<l && *se; i++)
Expand All @@ -971,9 +971,9 @@ static int _regions_parse_line(char *line, int ichr,int ifrom,int ito, char **ch
}
if ( i<l ) return -1;
if ( k==ifrom )
*to = hts_parse_decimal(ss, &tmp);
*to = hts_parse_decimal(ss, &tmp, 0);
else
*from = hts_parse_decimal(ss, &tmp);
*from = hts_parse_decimal(ss, &tmp, 0);
if ( ss==tmp ) return -1;
}

Expand Down

0 comments on commit 4361a3d

Please sign in to comment.