Skip to content

Commit

Permalink
send single hits to HMMER
Browse files Browse the repository at this point in the history
  • Loading branch information
horta committed Feb 26, 2024
1 parent 050e8fe commit fd36079
Show file tree
Hide file tree
Showing 17 changed files with 295 additions and 276 deletions.
2 changes: 1 addition & 1 deletion c-core/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.20.2 FATAL_ERROR)
project(deciphon VERSION 0.19.2 LANGUAGES C)
project(deciphon VERSION 0.20.0 LANGUAGES C)

include(cmake/warnings.cmake)
include(cmake/sanitizers.cmake)
Expand Down
19 changes: 0 additions & 19 deletions c-core/infer_amino.c

This file was deleted.

10 changes: 0 additions & 10 deletions c-core/infer_amino.h

This file was deleted.

92 changes: 72 additions & 20 deletions c-core/match.c
Original file line number Diff line number Diff line change
@@ -1,48 +1,100 @@
#include "match.h"
#include "imm/gencode.h"
#include "imm/nuclt_code.h"
#include "imm/path.h"
#include "protein.h"
#include "state.h"

struct match match_init(struct protein const *protein)
struct match match_begin(struct imm_path const *path,
struct imm_seq const *sequence,
struct protein const *protein)
{
return (struct match){protein, {}, {}, {}};
return (struct match){
.path = path, .sequence = sequence, .protein = protein, 0, 0};
}

int match_setup(struct match *x, struct imm_step step, struct imm_seq seq)
struct match match_end(void)
{
x->step = step;
x->seq = seq;
return (struct match){.path = NULL, .sequence = NULL, .protein = NULL, -1, -1};
}

if (!state_is_mute(step.state_id))
{
x->codon = imm_codon_any(x->protein->params.code->nuclt);
return protein_decode(x->protein, &seq, step.state_id, &x->codon);
}
return 0;
bool match_equal(struct match a, struct match b)
{
return a.path == b.path && a.sequence == b.sequence &&
a.protein == b.protein && a.step == b.step &&
a.sequence_position == b.sequence_position;
}

struct match match_next(struct match const *x)
{
if (match_equal(*x, match_end())) return match_end();
if (x->step + 1 == imm_path_nsteps(x->path)) return match_end();

int pos = x->sequence_position + imm_path_step(x->path, x->step)->seqsize;
int step = x->step + 1;
return (struct match){.path = x->path,
.sequence = x->sequence,
.protein = x->protein,
step,
pos};
}

int match_state_name(struct match const *x, char *dst)
{
return state_name(x->step.state_id, dst);
return state_name(match_state_id(x), dst);
}

bool match_state_is_mute(struct match const *x)
bool match_state_is_mutet(struct match const *x)
{
return state_is_mute(x->step.state_id);
return state_is_mute(match_state_id(x));
}

bool match_state_is_core(struct match const *x)
{
return state_is_match(x->step.state_id) ||
state_is_insert(x->step.state_id) || state_is_delete(x->step.state_id);
int state_id = match_state_id(x);
return state_is_match(state_id) || state_is_insert(state_id) ||
state_is_delete(state_id);
}

int match_state_state_id(struct match const *x) { return x->step.state_id; }
int match_state_id(struct match const *x)
{
return imm_path_step(x->path, x->step)->state_id;
}

char match_amino(struct match const *x)
int match_amino(struct match const *x, char *amino)
{
return imm_gencode_decode(x->protein->params.gencode, x->codon);
struct imm_codon codon = imm_codon_any(x->protein->params.code->nuclt);
int state_id = match_state_id(x);

struct imm_step const *step = imm_path_step(x->path, x->step);
int pos = x->sequence_position;
struct imm_range range = imm_range(pos, pos + step->seqsize);
struct imm_seq seq = imm_seq_slice(x->sequence, range);

int rc = protein_decode(x->protein, &seq, state_id, &codon);
if (rc) return rc;

*amino = imm_gencode_decode(x->protein->params.gencode, codon);
return 0;
}

int match_codon(struct match const *x, struct imm_codon *codon)
{
*codon = imm_codon_any(x->protein->params.code->nuclt);
int state_id = match_state_id(x);
struct imm_seq seq = match_subsequence(x);
return protein_decode(x->protein, &seq, state_id, codon);
}

struct imm_codon match_codon(struct match const *x) { return x->codon; }
struct imm_seq match_subsequence(struct match const *x)
{
struct imm_step const *step = imm_path_step(x->path, x->step);
int pos = x->sequence_position;
struct imm_range range = imm_range(pos, pos + step->seqsize);
return imm_seq_slice(x->sequence, range);
}

struct imm_step const *match_step(struct match const *x)
{
return imm_path_step(x->path, x->step);
}
36 changes: 21 additions & 15 deletions c-core/match.h
Original file line number Diff line number Diff line change
@@ -1,29 +1,35 @@
#ifndef MATCH_H
#define MATCH_H

#include "imm/codon.h"
#include "imm/step.h"
#include <stdbool.h>
#include "imm/step.h"

struct imm_codon;
struct imm_path;
struct imm_seq;
struct protein;

struct match
{
struct imm_path const *path;
struct imm_seq const *sequence;
struct protein const *protein;
struct imm_step step;
struct imm_seq seq;
struct imm_codon codon;
int step;
int sequence_position;
};

// clang-format off
struct match match_init(struct protein const *);
int match_setup(struct match *, struct imm_step, struct imm_seq);
int match_state_name(struct match const *, char *dst);
bool match_state_is_mute(struct match const *);
bool match_state_is_core(struct match const *);
int match_state_state_id(struct match const *);
char match_amino(struct match const *);
struct imm_codon match_codon(struct match const *);
// clang-format on
struct match match_begin(struct imm_path const *, struct imm_seq const *, struct protein const *);
struct match match_end(void);
bool match_equal(struct match, struct match);
struct match match_next(struct match const *);
int match_state_name(struct match const *, char *dst);
bool match_state_is_mutet(struct match const *);
bool match_state_is_core(struct match const *);
int match_state_id(struct match const *);
int match_amino(struct match const *, char *amino);
int match_codon(struct match const *, struct imm_codon *);

struct imm_seq match_subsequence(struct match const *);
struct imm_step const *match_step(struct match const *);

#endif
79 changes: 0 additions & 79 deletions c-core/match_iter.c

This file was deleted.

29 changes: 0 additions & 29 deletions c-core/match_iter.h

This file was deleted.

1 change: 1 addition & 0 deletions c-core/product.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ int product_close(struct product *x)
ok &= fputs("window\t", fp) >= 0;
ok &= fputs("window_start\t", fp) >= 0;
ok &= fputs("window_stop\t", fp) >= 0;
ok &= fputs("hit\t", fp) >= 0;
ok &= fputs("hit_start\t", fp) >= 0;
ok &= fputs("hit_stop\t", fp) >= 0;
ok &= fputs("profile\t", fp) >= 0;
Expand Down
1 change: 1 addition & 0 deletions c-core/product_line.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ void product_line_init(struct product_line *x)
x->window_start = 0;
x->window_stop = 0;

x->hit = 0;
x->hit_start = 0;
x->hit_stop = 0;

Expand Down
1 change: 1 addition & 0 deletions c-core/product_line.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ struct product_line
int window_start;
int window_stop;

int hit;
// [hit_start, hit_stop)
int hit_start;
int hit_stop;
Expand Down
Loading

0 comments on commit fd36079

Please sign in to comment.