From b088ba73fddf680c9d485151ed619e8ed6253425 Mon Sep 17 00:00:00 2001 From: Luke Sikina Date: Fri, 14 Jun 2024 13:53:49 -0400 Subject: [PATCH] [ALS-6467] Configurable VCF excerpt info column ordering - Make column sorter that pulls from properties - Use it to sort and exclude columns --- .../hpds/processing/ColumnSorter.java | 38 +++++++++++++++ .../hpds/processing/VariantListProcessor.java | 12 +++-- .../hpds/processing/ColumnSorterTest.java | 47 +++++++++++++++++++ 3 files changed, 94 insertions(+), 3 deletions(-) create mode 100644 processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/ColumnSorter.java create mode 100644 processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/ColumnSorterTest.java diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/ColumnSorter.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/ColumnSorter.java new file mode 100644 index 00000000..c278790d --- /dev/null +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/ColumnSorter.java @@ -0,0 +1,38 @@ +package edu.harvard.hms.dbmi.avillach.hpds.processing; + +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Component; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +@Component +public class ColumnSorter { + private final Map infoColumnsOrder; + + @Autowired + public ColumnSorter(@Value("#{'${variant.info_column_order:}'.split(',')}") List infoColumnOrder) { + HashMap order = new HashMap<>(); + for (int i = 0; i < infoColumnOrder.size(); i++) { + order.put(infoColumnOrder.get(i), i); + } + this.infoColumnsOrder = order; + } + + public List sortInfoColumns(List columns) { + // backwards compatibility check. + if (infoColumnsOrder.isEmpty()) { + return columns; + } + return columns.stream() + .filter(infoColumnsOrder::containsKey) + .sorted((a, b) -> Integer.compare( + infoColumnsOrder.getOrDefault(a, Integer.MAX_VALUE), + infoColumnsOrder.getOrDefault(b, Integer.MAX_VALUE) + )) + .collect(Collectors.toList()); + } +} diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/VariantListProcessor.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/VariantListProcessor.java index a40aa347..115b28fd 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/VariantListProcessor.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/VariantListProcessor.java @@ -6,6 +6,7 @@ import java.util.*; import java.util.stream.Collectors; +import org.checkerframework.checker.units.qual.C; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -17,6 +18,7 @@ import edu.harvard.hms.dbmi.avillach.hpds.data.query.Query; import edu.harvard.hms.dbmi.avillach.hpds.exception.NotEnoughMemoryException; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; @Component @@ -34,12 +36,14 @@ public class VariantListProcessor implements HpdsProcessor { private final int CACHE_SIZE; private final AbstractProcessor abstractProcessor; + private final ColumnSorter columnSorter; @Autowired - public VariantListProcessor(AbstractProcessor abstractProcessor) { + public VariantListProcessor(AbstractProcessor abstractProcessor, ColumnSorter columnSorter) { this.abstractProcessor = abstractProcessor; this.metadataIndex = VariantMetadataIndex.createInstance(VariantMetadataIndex.VARIANT_METADATA_BIN_FILE); + this.columnSorter = columnSorter; VCF_EXCERPT_ENABLED = "TRUE".equalsIgnoreCase(System.getProperty("VCF_EXCERPT_ENABLED", "FALSE")); //always enable aggregate queries if full queries are permitted. @@ -54,6 +58,7 @@ public VariantListProcessor(AbstractProcessor abstractProcessor) { public VariantListProcessor(boolean isOnlyForTests, AbstractProcessor abstractProcessor) { this.abstractProcessor = abstractProcessor; this.metadataIndex = null; + this.columnSorter = new ColumnSorter(List.of()); VCF_EXCERPT_ENABLED = "TRUE".equalsIgnoreCase(System.getProperty("VCF_EXCERPT_ENABLED", "FALSE")); //always enable aggregate queries if full queries are permitted. @@ -172,8 +177,9 @@ public String runVcfExcerptQuery(Query query, boolean includePatientData) throws //5 columns for gene info builder.append("CHROM\tPOSITION\tREF\tALT"); + List infoStoreColumns = columnSorter.sortInfoColumns(abstractProcessor.getInfoStoreColumns()); //now add the variant metadata column headers - for(String key : abstractProcessor.getInfoStoreColumns()) { + for(String key : infoStoreColumns) { builder.append("\t" + key); } @@ -251,7 +257,7 @@ public String runVcfExcerptQuery(Query query, boolean includePatientData) throws } //need to make sure columns are pushed out in the right order; use same iterator as headers - for(String key : abstractProcessor.getInfoStoreColumns()) { + for(String key : infoStoreColumns) { Set columnMeta = variantColumnMap.get(key); if(columnMeta != null) { //collect our sets to a single entry diff --git a/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/ColumnSorterTest.java b/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/ColumnSorterTest.java new file mode 100644 index 00000000..954ac9f8 --- /dev/null +++ b/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/ColumnSorterTest.java @@ -0,0 +1,47 @@ +package edu.harvard.hms.dbmi.avillach.hpds.processing; + + +import org.junit.Assert; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.List; + +public class ColumnSorterTest { + + @Test + public void shouldSortColumns() { + ColumnSorter subject = new ColumnSorter(List.of("a", "b", "c")); + List actual = subject.sortInfoColumns(new ArrayList<>(List.of("b", "c", "a"))); + List expected = List.of("a", "b", "c"); + + Assert.assertEquals(expected, actual); + } + + @Test + public void shouldExcludeMissingColumns() { + ColumnSorter subject = new ColumnSorter(List.of("a", "b", "c")); + List actual = subject.sortInfoColumns(new ArrayList<>(List.of("d", "b", "c", "a"))); + List expected = List.of("a", "b", "c"); + + Assert.assertEquals(expected, actual); + } + + @Test + public void shouldNotBreakForMissingColumns() { + ColumnSorter subject = new ColumnSorter(List.of("a", "b", "c", "d")); + List actual = subject.sortInfoColumns(new ArrayList<>(List.of("d", "a"))); + List expected = List.of("a", "d"); + + Assert.assertEquals(expected, actual); + } + + @Test + public void shouldNoOpWithoutConfig() { + ColumnSorter subject = new ColumnSorter(List.of()); + List actual = subject.sortInfoColumns(new ArrayList<>(List.of("b", "c", "a"))); + List expected = List.of("b", "c", "a"); + + Assert.assertEquals(expected, actual); + } +} \ No newline at end of file