Skip to content

Commit

Permalink
Get rid of the query Sequence object and expose xdrop
Browse files Browse the repository at this point in the history
  • Loading branch information
milot-mirdita committed Jul 6, 2023
1 parent ef06379 commit 32d32a6
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 29 deletions.
40 changes: 21 additions & 19 deletions src/commons/BlockAligner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,12 @@ LocalAln align_local(
size_t b_len, const char* b_str, const char* b_rev, PaddedBytes* b,
const AAMatrix* matrix, Gaps gaps,
size_t a_idx, size_t b_idx,
Cigar* cigar, SizeRange range
Cigar* cigar, SizeRange range, int32_t x_drop
) {
LocalAln res_aln;
AlignResult res;

int32_t x_drop = -(range.min * gaps.extend + gaps.open);
// int32_t x_drop = -(range.min * gaps.extend + gaps.open);

// forwards alignment starting at (a_idx, b_idx)
block_set_bytes_padded_aa(a, (uint8_t*)(a_str + a_idx), a_len - a_idx, range.max);
Expand Down Expand Up @@ -128,15 +128,15 @@ LocalAln align_local(
LocalAln align_local_profile(
BlockHandle block_trace, BlockHandle block_no_trace,
const size_t a_len, const char* a_str, const char* a_rev, PaddedBytes* a,
const size_t b_len, const int8_t* b_profile_matrix, AAProfile* b,
const size_t b_len, const char* b_str, AAProfile* b,
Gaps gaps, BaseMatrix& subMat,
size_t a_idx, size_t b_idx,
Cigar* cigar, SizeRange range
Cigar* cigar, SizeRange range, int32_t x_drop
) {
LocalAln res_aln;
AlignResult res;

int32_t x_drop = 10000; // -(range.min * gaps.extend + gaps.open);
// int32_t x_drop = 10000; // -(range.min * gaps.extend + gaps.open);
// forwards alignment starting at (a_idx, b_idx)
block_set_bytes_padded_aa(a, (uint8_t*)(a_str + a_idx), a_len - a_idx, range.max);

Expand All @@ -146,7 +146,7 @@ LocalAln align_local_profile(
for (size_t i = 0; i < (b_len - b_idx); ++i) {
// printf("%d ", b_idx + i);
for (uint8_t j = 0; j < Sequence::PROFILE_AA_SIZE; ++j) {
volatile int val = b_profile_matrix[(j * b_len) + (b_idx + i)];
volatile int val = static_cast<int8_t>(static_cast<short>(b_str[(b_idx + i) * Sequence::PROFILE_READIN_SIZE + j]) / 4);
// if (val < curr_max) {
// curr_max = val;
// }
Expand Down Expand Up @@ -195,7 +195,8 @@ LocalAln align_local_profile(
for (size_t i = 0; i < (b_len - b_idx); ++i) {
// printf("%d ", b_len - b_idx - i);
for (uint8_t j = 0; j < Sequence::PROFILE_AA_SIZE; ++j) {
volatile int val = b_profile_matrix[(j * b_len) + (b_len - b_idx - i)];
// volatile int val = b_profile_matrix[(j * b_len) + (b_len - b_idx - i)];
volatile int val = static_cast<int8_t>(static_cast<short>(b_str[(b_len - b_idx - i) * Sequence::PROFILE_READIN_SIZE + j]) / 4);
// if (val < curr_max) {
// curr_max = val;
// }
Expand Down Expand Up @@ -233,19 +234,20 @@ LocalAln align_local_profile(


Matcher::result_t
BlockAligner::align(Sequence &query,
DistanceCalculator::LocalAlignment alignment,
EvalueComputation *evaluer,
int xdrop,
BaseMatrix* subMat) {
unsigned int qKey = query.getDbKey();
size_t b_len = query.L;
const char* b_str = query.getSeqData();
BlockAligner::align(
const char* querySeq,
unsigned int queryLength,
DistanceCalculator::LocalAlignment alignment,
EvalueComputation *evaluer,
int xdrop,
BaseMatrix* subMat
) {
size_t b_len = queryLength;
const char* b_str = querySeq;
if (dbtype == Parameters::DBTYPE_AMINO_ACIDS) {
SRAUtil::strrev(querySeqRev, b_str, b_len);
}
const char* b_rev = querySeqRev;
const int8_t *b_profile_matrix = query.getAlignmentProfile();

size_t a_len = targetLength;
const char* a_str = targetSeq;
Expand Down Expand Up @@ -275,9 +277,9 @@ BlockAligner::align(Sequence &query,

LocalAln local_aln;
if (dbtype == Parameters::DBTYPE_AMINO_ACIDS) {
local_aln = align_local(blockTrace, blockNoTrace, a_len, a_str, a_rev, a, b_len, b_str, b_rev, b, &BLOSUM62, gaps, qUngappedEndPos, dbUngappedEndPos, cigar, range);
local_aln = align_local(blockTrace, blockNoTrace, a_len, a_str, a_rev, a, b_len, b_str, b_rev, b, &BLOSUM62, gaps, qUngappedEndPos, dbUngappedEndPos, cigar, range, xdrop);
} else {
local_aln = align_local_profile(blockTrace, blockNoTrace, a_len, a_str, a_rev, a, b_len, b_profile_matrix, bProfile, gaps, *subMat, qUngappedEndPos, dbUngappedEndPos, cigar, range);
local_aln = align_local_profile(blockTrace, blockNoTrace, a_len, a_str, a_rev, a, b_len, b_str, bProfile, gaps, *subMat, qUngappedEndPos, dbUngappedEndPos, cigar, range, xdrop);
}
// printf("a: %s\nb: %s\nscore: %d\nstart idx: (%lu, %lu)\nend idx: (%lu, %lu)\n",
// a_str,
Expand Down Expand Up @@ -346,7 +348,7 @@ BlockAligner::align(Sequence &query,
// EXIT(EXIT_FAILURE);

return Matcher::result_t(
qKey, bitScore, qcov, dbcov, seqId, evalue, alnLength,
0, bitScore, qcov, dbcov, seqId, evalue, alnLength,
local_aln.a_start, local_aln.a_end, a_len, local_aln.b_start, local_aln.b_end, b_len,
backtrace
);
Expand Down
3 changes: 2 additions & 1 deletion src/commons/BlockAligner.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ class BlockAligner {
void initTarget(Sequence &target);

Matcher::result_t align(
Sequence &query,
const char* querySeq,
unsigned int queryLength,
DistanceCalculator::LocalAlignment alignment,
EvalueComputation *evaluer,
int xdrop,
Expand Down
2 changes: 1 addition & 1 deletion src/commons/LocalParameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class LocalParameters : public Parameters {
unsigned int requiredKmerMatches;

PARAMETER(PARAM_X_DROP)
int8_t xdrop;
unsigned int xdrop;

PARAMETER(PARAM_RANGE_MIN)
uintptr_t rangeMin;
Expand Down
11 changes: 6 additions & 5 deletions src/sra/blockalign.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,6 @@ int blockalign(int argc, const char **argv, const Command &command) {
#ifdef OPENMP
thread_idx = static_cast<unsigned int>(omp_get_thread_num());
#endif
Sequence querySeq(par.maxSeqLen, querySequenceReader.getDbtype(), subMat, useProfileSearch ? 0 : par.kmerSize, false, false, !useProfileSearch);
Sequence targetSeq(par.maxSeqLen, seqType, subMat, par.kmerSize, par.spacedKmer, false, false, par.spacedKmerPattern);

Indexer idx(subMat->alphabetSize - 1, par.kmerSize);
Expand All @@ -223,8 +222,10 @@ int blockalign(int argc, const char **argv, const Command &command) {
par.maxSeqLen, par.rangeMin, par.rangeMax,
isNucDB ? -par.gapOpen.values.nucleotide() : -par.gapOpen.values.aminoacid(),
isNucDB ? -par.gapExtend.values.nucleotide() : -par.gapExtend.values.aminoacid(),
querySeq.getSeqType()
querySequenceReader.getDbtype()
);

// Sequence querySeq(par.maxSeqLen, querySequenceReader.getDbtype(), subMat, 0, false, false, false);
// Matcher matcher(querySeq.getSeqType(), targetSeq.getSeqType(), par.maxSeqLen, subMat, &evaluer, false, 1.0, par.gapOpen.values.aminoacid(), par.gapExtend.values.aminoacid(), 1.0, 0);

char buffer[1024];
Expand Down Expand Up @@ -304,7 +305,7 @@ int blockalign(int argc, const char **argv, const Command &command) {

if (useProfileSearch) {
realSeq.clear();
querySeq.extractProfileSequence(querySeqData, queryEntryLen - 1, *subMat, realSeq);
Sequence::extractProfileSequence(querySeqData, queryEntryLen - 1, *subMat, realSeq);
if (realSeq.length() != querySeqLen) {
Debug(Debug::ERROR) << "Query sequence length is wrong!\n"
<< "Correct count: " << correct_count << "\n"
Expand Down Expand Up @@ -360,10 +361,10 @@ int blockalign(int argc, const char **argv, const Command &command) {
isBlockAlignerInit = true;
}

querySeq.mapSequence(queryId, queryKey, querySeqData, querySeqLen);
// querySeq.mapSequence(queryId, queryKey, querySeqData, querySeqLen);
// matcher.initQuery(&querySeq);
// Matcher::result_t res = matcher.getSWResult(&targetSeq, INT_MAX, false, 0, 0.0, par.evalThr, Matcher::SCORE_COV_SEQID, 0, false);
Matcher::result_t res = blockAligner.align(querySeq, aln, &evaluer, xdrop, subMat);
Matcher::result_t res = blockAligner.align(querySeqData, querySeqLen, aln, &evaluer, xdrop, subMat);
res.dbKey = targetKey;
res.queryOrfStartPos = queryKey;
alignmentsNum++;
Expand Down
6 changes: 3 additions & 3 deletions src/sra/playground.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,8 @@ unsigned int qprof_1_len = 45026;

unsigned int seq_len = (qprof_1_len - 1) / Sequence::PROFILE_READIN_SIZE;

Sequence s1(65000, Parameters::DBTYPE_HMM_PROFILE, &subMat, kmer_size, true, true);
s1.mapSequence(0, 0, (const char*)qprof_1, seq_len);
// Sequence s1(65000, Parameters::DBTYPE_HMM_PROFILE, &subMat, kmer_size, true, true);
// s1.mapSequence(0, 0, (const char*)qprof_1, seq_len);
// s1.printProfile();

Sequence s2(65000, Parameters::DBTYPE_AMINO_ACIDS, &subMat, kmer_size, false, false);
Expand All @@ -230,7 +230,7 @@ unsigned int qprof_1_len = 45026;
aln.endPos = 0;
aln.diagonal = 0;
aln.distToDiagonal = 0;
Matcher::result_t res = blockAligner.align(s1, aln, &evaluer, 10000, &subMat);
Matcher::result_t res = blockAligner.align((const char*)qprof_1, seq_len, aln, &evaluer, 10000, &subMat);

std::cout << "Score: " << res.score << std::endl;
std::cout << "Eval: " << res.eval << std::endl;
Expand Down

0 comments on commit 32d32a6

Please sign in to comment.