Skip to content

Commit

Permalink
Added tests for structured search
Browse files Browse the repository at this point in the history
  • Loading branch information
Adrian Herrmann committed Aug 8, 2024
1 parent e461766 commit 5e3c6ec
Show file tree
Hide file tree
Showing 4 changed files with 218 additions and 34 deletions.
1 change: 1 addition & 0 deletions app/models/fingerprint.rb
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ class Fingerprint < ApplicationRecord
fp_vector = Chemotion::OpenBabelService.bin_fingerprint_from_molfile(molfile) if molfile
threshold = threshold.to_f
query_num_set_bits = count_bits_set(fp_vector)
return none unless query_num_set_bits.positive?

sim_query = sanitize_sql_for_conditions(
[sql_query_similar, query_num_set_bits] + fp_vector + [
Expand Down
23 changes: 23 additions & 0 deletions db/schema.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@
ActiveRecord::Schema.define(version: 2024_07_09_095243) do

# These are extensions that must be enabled in order to support this database
enable_extension "btree_gist"
enable_extension "hstore"
enable_extension "pg_trgm"
enable_extension "pgcrypto"
enable_extension "plpgsql"
enable_extension "rdkit"
enable_extension "uuid-ossp"

create_table "affiliations", id: :serial, force: :cascade do |t|
Expand Down Expand Up @@ -1701,11 +1703,32 @@
and l.element_type = $1 and l.element_id = $2
$function$
SQL
create_function :set_samples_mol_rdkit, sql_definition: <<-'SQL'
CREATE OR REPLACE FUNCTION public.set_samples_mol_rdkit()
RETURNS trigger
LANGUAGE plpgsql
AS $function$
begin
if (TG_OP='INSERT') then
insert into rdk.mols values (new.id, mol_from_ctab(encode(new.molfile, 'escape')::cstring));
end if;
if (TG_OP='UPDATE') then
if new.MOLFILE <> old.MOLFILE then
update rdk.mols set m = mol_from_ctab(encode(new.molfile, 'escape')::cstring) where id = new.id;
end if;
end if;
return new;
end
$function$
SQL


create_trigger :update_users_matrix_trg, sql_definition: <<-SQL
CREATE TRIGGER update_users_matrix_trg AFTER INSERT OR UPDATE ON public.matrices FOR EACH ROW EXECUTE FUNCTION update_users_matrix()
SQL
create_trigger :set_samples_mol_rdkit_trg, sql_definition: <<-SQL
CREATE TRIGGER set_samples_mol_rdkit_trg BEFORE INSERT OR UPDATE ON public.samples FOR EACH ROW EXECUTE FUNCTION set_samples_mol_rdkit()
SQL

create_view "v_samples_collections", sql_definition: <<-SQL
SELECT cols.id AS cols_id,
Expand Down
213 changes: 179 additions & 34 deletions spec/api/chemotion/search_api_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,19 @@

require 'rails_helper'

# rubocop:disable RSpec/MultipleMemoizedHelpers, RSpec/MultipleExpectations
# rubocop:disable RSpec/MultipleMemoizedHelpers, RSpec/MultipleExpectations, RSpec/NestedGroups

describe Chemotion::SearchAPI do
include_context 'api request authorization context'

let(:other_user) { create(:person) }
let(:collection) { create(:collection, user: user) }
let(:other_collection) { create(:collection, user: other_user) }
let(:other_collection) { create(:collection, user: user) }
let(:sample_a) { create(:sample, name: 'SampleA', creator: user) }
let(:sample_b) { create(:sample, name: 'SampleB', creator: user) }
let(:sample_c) { create(:sample, name: 'SampleC', creator: other_user) }
let(:sample_d) { create(:sample, name: 'SampleD', creator: other_user) }
let(:sample_c) { create(:sample, name: 'SampleC', creator: user) }
let(:sample_d) { create(:sample, name: 'SampleD', creator: user) }
let(:sample_e) { create(:sample, name: 'Methonol', creator: user, molfile: mof3000_2) }
let(:sample_f) { create(:sample, name: 'Dekan', creator: user, molfile: mof3000_1) }
let(:wellplate) { create(:wellplate, name: 'Wellplate', wells: [build(:well, sample: sample_a)]) }
let(:other_wellplate) { create(:wellplate, name: 'Other Wellplate', wells: [build(:well, sample: sample_b)]) }
let(:reaction) { create(:reaction, name: 'Reaction', samples: [sample_a, sample_b], creator: user) }
Expand All @@ -38,10 +39,12 @@
end

let(:reaction_with_duration) { create(:reaction, name: 'invalid Reaction', creator: user, duration: '1.33 Day(s)') }
let(:other_reaction) { create(:reaction, name: 'Other Reaction', samples: [sample_c, sample_d], creator: other_user) }
let(:other_reaction) { create(:reaction, name: 'Other Reaction', samples: [sample_c, sample_d], creator: user) }
let(:screen) { create(:screen, name: 'Screen') }
let(:other_screen) { create(:screen, name: 'Other Screen') }
let!(:cell_line) { create(:cellline_sample, name: 'another-cellline-search-example', collections: [collection]) }
let!(:mof3000_1) { Rails.root.join('spec/fixtures/mof_v3000_1.mol').read }
let!(:mof3000_2) { Rails.root.join('spec/fixtures/mof_v3000_2.mol').read }

before do
CollectionsReaction.create!(reaction: reaction, collection: collection)
Expand All @@ -52,6 +55,8 @@
CollectionsReaction.create!(reaction: reaction_with_negative_temperature, collection: collection)
CollectionsReaction.create!(reaction: invalid_reaction_with_temperature, collection: collection)
CollectionsSample.create!(sample: sample_a, collection: collection)
CollectionsSample.create!(sample: sample_e, collection: collection)
CollectionsSample.create!(sample: sample_f, collection: collection)
CollectionsScreen.create!(screen: screen, collection: collection)
CollectionsWellplate.create!(wellplate: wellplate, collection: collection)
ScreensWellplate.create!(wellplate: wellplate, screen: screen)
Expand Down Expand Up @@ -278,37 +283,177 @@

describe 'POST /api/v1/search/structure' do
let(:url) { '/api/v1/search/structure' }
let(:params) do
{
selection: {
elementType: :structure,
molfile: molfile,
search_type: 'sub',
tanimoto_threshold: 0.7,
search_by_method: :structure,
structure_search: true,
},
collection_id: collection.id,
page: 1,
per_page: 15,
molecule_sort: true,
}

context 'when search_by_fingerprint_sim' do
let(:params) do
{
selection: {
elementType: :structure,
molfile: molfile,
search_type: 'similar',
tanimoto_threshold: 0.7,
search_by_method: :structure,
structure_search: true,
},
collection_id: collection.id,
page: 1,
per_page: 15,
molecule_sort: true,
}
end

context 'when molecule is too small' do
let(:molfile) { sample_a.molfile }

it 'returns nothing found' do
result = JSON.parse(response.body)

expect(result.dig('reactions', 'totalElements')).to eq 0
expect(result.dig('samples', 'totalElements')).to eq 0
expect(result.dig('screens', 'totalElements')).to eq 0
expect(result.dig('wellplates', 'totalElements')).to eq 0
end
end

context 'when molecule is big enough' do
let(:molfile) { sample_e.molfile }

it 'returns the sample' do
result = JSON.parse(response.body)

expect(result.dig('reactions', 'totalElements')).to eq 1
expect(result.dig('reactions', 'ids')).to eq [reaction.id]
expect(result.dig('samples', 'totalElements')).to eq 2
expect(result.dig('samples', 'ids')).to eq [sample_e.id, sample_a.id]
expect(result.dig('screens', 'totalElements')).to eq 1
expect(result.dig('screens', 'ids')).to eq [screen.id]
expect(result.dig('wellplates', 'totalElements')).to eq 1
expect(result.dig('wellplates', 'ids')).to eq [wellplate.id]
end
end
end

context 'when searching a molfile in samples in correct collection' do
let(:molfile) { sample_a.molfile }
context 'when search_by_fingerprint_sub' do
context 'when searching a molfile in samples in correct collection' do
let(:molfile) { sample_a.molfile }

it 'returns the sample and all other objects referencing the sample from the requested collection' do
result = JSON.parse(response.body)
let(:params) do
{
selection: {
elementType: :structure,
molfile: molfile,
search_type: 'sub',
search_by_method: :structure,
structure_search: true,
},
collection_id: collection.id,
page: 1,
per_page: 15,
molecule_sort: true,
}
end

it 'returns the sample and all other objects referencing the sample from the requested collection' do
result = JSON.parse(response.body)
expect(result.dig('reactions', 'totalElements')).to eq 1
expect(result.dig('reactions', 'ids')).to eq [reaction.id]
expect(result.dig('samples', 'totalElements')).to eq 2
expect(result.dig('samples', 'ids')).to eq [sample_e.id, sample_a.id]
expect(result.dig('screens', 'totalElements')).to eq 1
expect(result.dig('screens', 'ids')).to eq [screen.id]
expect(result.dig('wellplates', 'totalElements')).to eq 1
expect(result.dig('wellplates', 'ids')).to eq [wellplate.id]
end
end

expect(result.dig('reactions', 'totalElements')).to eq 1
expect(result.dig('reactions', 'ids')).to eq [reaction.id]
expect(result.dig('samples', 'totalElements')).to eq 1
expect(result.dig('samples', 'ids')).to eq [sample_a.id]
expect(result.dig('screens', 'totalElements')).to eq 1
expect(result.dig('screens', 'ids')).to eq [screen.id]
expect(result.dig('wellplates', 'totalElements')).to eq 1
expect(result.dig('wellplates', 'ids')).to eq [wellplate.id]
context 'when searching a molfile in samples in wrong collection' do
let(:molfile) { mof3000_1 }
let(:params) do
{
selection: {
elementType: :structure,
molfile: molfile,
search_type: 'sub',
search_by_method: :structure,
structure_search: true,
},
collection_id: other_collection.id,
page: 1,
per_page: 15,
molecule_sort: true,
}
end

it 'returns nothing found' do
result = JSON.parse(response.body)

expect(result.dig('reactions', 'totalElements')).to eq 0
expect(result.dig('samples', 'totalElements')).to eq 0
expect(result.dig('screens', 'totalElements')).to eq 0
expect(result.dig('wellplates', 'totalElements')).to eq 0
end
end
end

context 'when search_by_rdkit_sub' do
context 'when searching a molfile in samples in correct collection' do
let(:molfile) { sample_a.molfile }

let(:params) do
{
selection: {
elementType: :structure,
molfile: molfile,
search_type: 'subRDKit',
search_by_method: :structure,
structure_search: true,
},
collection_id: collection.id,
page: 1,
per_page: 15,
molecule_sort: true,
}
end

it 'returns the sample and all other objects referencing the sample from the requested collection' do
result = JSON.parse(response.body)
expect(result.dig('reactions', 'totalElements')).to eq 1
expect(result.dig('reactions', 'ids')).to eq [reaction.id]
expect(result.dig('samples', 'totalElements')).to eq 2
expect(result.dig('samples', 'ids')).to eq [sample_e.id, sample_a.id]
expect(result.dig('screens', 'totalElements')).to eq 1
expect(result.dig('screens', 'ids')).to eq [screen.id]
expect(result.dig('wellplates', 'totalElements')).to eq 1
expect(result.dig('wellplates', 'ids')).to eq [wellplate.id]
end
end

context 'when searching a molfile in samples in wrong collection' do
let(:molfile) { mof3000_1 }
let(:params) do
{
selection: {
elementType: :structure,
molfile: molfile,
search_type: 'subRDKit',
search_by_method: :structure,
structure_search: true,
},
collection_id: other_collection.id,
page: 1,
per_page: 15,
molecule_sort: true,
}
end

it 'returns nothing found' do
result = JSON.parse(response.body)

expect(result.dig('reactions', 'totalElements')).to eq 0
expect(result.dig('samples', 'totalElements')).to eq 0
expect(result.dig('screens', 'totalElements')).to eq 0
expect(result.dig('wellplates', 'totalElements')).to eq 0
end
end
end
end
Expand Down Expand Up @@ -402,4 +547,4 @@
pending 'TODO: Add missing spec'
end
end
# rubocop:enable RSpec/MultipleMemoizedHelpers, RSpec/MultipleExpectations
# rubocop:enable RSpec/MultipleMemoizedHelpers, RSpec/MultipleExpectations, RSpec/NestedGroups
15 changes: 15 additions & 0 deletions spec/fixtures/mof_v3000_2.mol
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@

-INDIGO-08072414562D

0 0 0 0 0 0 0 0 0 0 0 V3000
M V30 BEGIN CTAB
M V30 COUNTS 2 1 0 0 0
M V30 BEGIN ATOM
M V30 1 O 22.9 -5.45 0.0 0
M V30 2 C 23.9 -5.45 0.0 0
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 1 1 2
M V30 END BOND
M V30 END CTAB
M END

0 comments on commit 5e3c6ec

Please sign in to comment.