Skip to content

Commit

Permalink
[ALS-6467] Configurable VCF excerpt info column ordering
Browse files Browse the repository at this point in the history
- Make column sorter that pulls from properties
- Use it to sort and exclude columns
  • Loading branch information
Luke Sikina committed Jul 8, 2024
1 parent eb44930 commit b088ba7
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 3 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package edu.harvard.hms.dbmi.avillach.hpds.processing;

import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;

import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

@Component
public class ColumnSorter {
private final Map<String, Integer> infoColumnsOrder;

@Autowired
public ColumnSorter(@Value("#{'${variant.info_column_order:}'.split(',')}") List<String> infoColumnOrder) {
HashMap<String, Integer> order = new HashMap<>();
for (int i = 0; i < infoColumnOrder.size(); i++) {
order.put(infoColumnOrder.get(i), i);
}
this.infoColumnsOrder = order;
}

public List<String> sortInfoColumns(List<String> columns) {
// backwards compatibility check.
if (infoColumnsOrder.isEmpty()) {
return columns;
}
return columns.stream()
.filter(infoColumnsOrder::containsKey)
.sorted((a, b) -> Integer.compare(
infoColumnsOrder.getOrDefault(a, Integer.MAX_VALUE),
infoColumnsOrder.getOrDefault(b, Integer.MAX_VALUE)
))
.collect(Collectors.toList());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import java.util.*;
import java.util.stream.Collectors;

import org.checkerframework.checker.units.qual.C;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand All @@ -17,6 +18,7 @@
import edu.harvard.hms.dbmi.avillach.hpds.data.query.Query;
import edu.harvard.hms.dbmi.avillach.hpds.exception.NotEnoughMemoryException;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;

@Component
Expand All @@ -34,12 +36,14 @@ public class VariantListProcessor implements HpdsProcessor {
private final int CACHE_SIZE;

private final AbstractProcessor abstractProcessor;
private final ColumnSorter columnSorter;


@Autowired
public VariantListProcessor(AbstractProcessor abstractProcessor) {
public VariantListProcessor(AbstractProcessor abstractProcessor, ColumnSorter columnSorter) {
this.abstractProcessor = abstractProcessor;
this.metadataIndex = VariantMetadataIndex.createInstance(VariantMetadataIndex.VARIANT_METADATA_BIN_FILE);
this.columnSorter = columnSorter;

VCF_EXCERPT_ENABLED = "TRUE".equalsIgnoreCase(System.getProperty("VCF_EXCERPT_ENABLED", "FALSE"));
//always enable aggregate queries if full queries are permitted.
Expand All @@ -54,6 +58,7 @@ public VariantListProcessor(AbstractProcessor abstractProcessor) {
public VariantListProcessor(boolean isOnlyForTests, AbstractProcessor abstractProcessor) {
this.abstractProcessor = abstractProcessor;
this.metadataIndex = null;
this.columnSorter = new ColumnSorter(List.of());

VCF_EXCERPT_ENABLED = "TRUE".equalsIgnoreCase(System.getProperty("VCF_EXCERPT_ENABLED", "FALSE"));
//always enable aggregate queries if full queries are permitted.
Expand Down Expand Up @@ -172,8 +177,9 @@ public String runVcfExcerptQuery(Query query, boolean includePatientData) throws
//5 columns for gene info
builder.append("CHROM\tPOSITION\tREF\tALT");

List<String> infoStoreColumns = columnSorter.sortInfoColumns(abstractProcessor.getInfoStoreColumns());
//now add the variant metadata column headers
for(String key : abstractProcessor.getInfoStoreColumns()) {
for(String key : infoStoreColumns) {
builder.append("\t" + key);
}

Expand Down Expand Up @@ -251,7 +257,7 @@ public String runVcfExcerptQuery(Query query, boolean includePatientData) throws
}

//need to make sure columns are pushed out in the right order; use same iterator as headers
for(String key : abstractProcessor.getInfoStoreColumns()) {
for(String key : infoStoreColumns) {
Set<String> columnMeta = variantColumnMap.get(key);
if(columnMeta != null) {
//collect our sets to a single entry
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package edu.harvard.hms.dbmi.avillach.hpds.processing;


import org.junit.Assert;
import org.junit.Test;

import java.util.ArrayList;
import java.util.List;

public class ColumnSorterTest {

@Test
public void shouldSortColumns() {
ColumnSorter subject = new ColumnSorter(List.of("a", "b", "c"));
List<String> actual = subject.sortInfoColumns(new ArrayList<>(List.of("b", "c", "a")));
List<String> expected = List.of("a", "b", "c");

Assert.assertEquals(expected, actual);
}

@Test
public void shouldExcludeMissingColumns() {
ColumnSorter subject = new ColumnSorter(List.of("a", "b", "c"));
List<String> actual = subject.sortInfoColumns(new ArrayList<>(List.of("d", "b", "c", "a")));
List<String> expected = List.of("a", "b", "c");

Assert.assertEquals(expected, actual);
}

@Test
public void shouldNotBreakForMissingColumns() {
ColumnSorter subject = new ColumnSorter(List.of("a", "b", "c", "d"));
List<String> actual = subject.sortInfoColumns(new ArrayList<>(List.of("d", "a")));
List<String> expected = List.of("a", "d");

Assert.assertEquals(expected, actual);
}

@Test
public void shouldNoOpWithoutConfig() {
ColumnSorter subject = new ColumnSorter(List.of());
List<String> actual = subject.sortInfoColumns(new ArrayList<>(List.of("b", "c", "a")));
List<String> expected = List.of("b", "c", "a");

Assert.assertEquals(expected, actual);
}
}

0 comments on commit b088ba7

Please sign in to comment.