From 69c68095b214381277623113252ec845560ee0aa Mon Sep 17 00:00:00 2001 From: Moriarty <22225248+apmoriarty@users.noreply.github.com> Date: Tue, 31 Oct 2023 11:13:32 -0400 Subject: [PATCH 01/32] TLDEquality refactored to perform an in-line byte comparison (#2151) * TLDEquality refactored to perform an in-line byte comparison instead of delegating to expensive TLD utility methods * Check for edge cases --- .../datawave/query/function/TLDEquality.java | 22 +++-- .../query/function/TLDEqualityTest.java | 87 ++++++++++--------- 2 files changed, 63 insertions(+), 46 deletions(-) diff --git a/warehouse/query-core/src/main/java/datawave/query/function/TLDEquality.java b/warehouse/query-core/src/main/java/datawave/query/function/TLDEquality.java index da2ec14fa6..56d32227f9 100644 --- a/warehouse/query-core/src/main/java/datawave/query/function/TLDEquality.java +++ b/warehouse/query-core/src/main/java/datawave/query/function/TLDEquality.java @@ -3,11 +3,9 @@ import org.apache.accumulo.core.data.ByteSequence; import org.apache.accumulo.core.data.Key; -import datawave.query.tld.TLD; - /** * A key equality implementation that compares to the root pointers of two doc Ids together. - * + *

* For example, two IDs `h1.h2.h3.a.b.c.d` and `h1.h2.h3.e.f` would be considered equal by this check. */ public class TLDEquality implements Equality { @@ -23,8 +21,20 @@ public class TLDEquality implements Equality { */ @Override public boolean partOf(Key key, Key other) { - ByteSequence docCF = TLD.estimateRootPointerFromId(key.getColumnFamilyData()); - ByteSequence otherCF = TLD.estimateRootPointerFromId(other.getColumnFamilyData()); - return otherCF.equals(docCF); + ByteSequence keyCf = key.getColumnFamilyData(); + ByteSequence otherCf = other.getColumnFamilyData(); + + int dotCount = 0; + int len = Math.min(keyCf.length(), otherCf.length()); + for (int i = 0; i < len; i++) { + byte a = keyCf.byteAt(i); + byte b = otherCf.byteAt(i); + if (a != b) { + return false; + } else if (a == '.' && ++dotCount == 3) { + return true; + } + } + return len != 0; } } diff --git a/warehouse/query-core/src/test/java/datawave/query/function/TLDEqualityTest.java b/warehouse/query-core/src/test/java/datawave/query/function/TLDEqualityTest.java index daa5fc19c7..c170a37ca3 100644 --- a/warehouse/query-core/src/test/java/datawave/query/function/TLDEqualityTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/function/TLDEqualityTest.java @@ -1,68 +1,75 @@ package datawave.query.function; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import org.apache.accumulo.core.data.Key; -import org.junit.Test; +import org.junit.jupiter.api.Test; -public class TLDEqualityTest { +class TLDEqualityTest { + private final Key key = new Key("row", "datatype\0-7m7uk9.oz9qpy.-nfahrv"); + private final Key keyChildOne = new Key("row", "datatype\0-7m7uk9.oz9qpy.-nfahrv.123.987"); + private final Key keyChildTwo = new Key("row", "datatype\0-7m7uk9.oz9qpy.-nfahrv.123.777"); + private final Key otherKey = new Key("row", "datatype\0-7m7uk9.oz9qpy.-aaazzz"); + private final Key otherKeyChildOne = new Key("row", "datatype\0-7m7uk9.oz9qpy.-aaazzz.123.987"); - private TLDEquality equality = new TLDEquality(); + private final TLDEquality equality = new TLDEquality(); @Test - public void testSameParent() { - Key docKey = new Key("row", "parent.document.id"); - Key otherKey = new Key("row", "parent.document.id"); - assertTrue(equality.partOf(docKey, otherKey)); - assertTrue(equality.partOf(otherKey, docKey)); + void testSameParent() { + assertTrue(equality.partOf(key, key)); + assertTrue(equality.partOf(otherKey, otherKey)); } @Test - public void testDifferentParents() { - Key docKey = new Key("row", "parent.document.id"); - Key otherKey = new Key("row", "parent.document.id2"); - assertFalse(equality.partOf(docKey, otherKey)); - assertFalse(equality.partOf(otherKey, docKey)); + void testDifferentParents() { + assertFalse(equality.partOf(key, otherKey)); + assertFalse(equality.partOf(otherKey, key)); } @Test - public void testKeysOfDifferentDepths() { - Key docKey = new Key("row", "parent.document.id"); - Key otherKey = new Key("row", "parent.document.id.child"); - assertFalse(equality.partOf(docKey, otherKey)); - assertFalse(equality.partOf(otherKey, docKey)); + void testKeysOfDifferentDepths() { + assertTrue(equality.partOf(key, keyChildOne)); + assertTrue(equality.partOf(keyChildOne, key)); + + assertTrue(equality.partOf(otherKey, otherKeyChildOne)); + assertTrue(equality.partOf(otherKeyChildOne, otherKey)); + } + + @Test + void testSameParentSameChildren() { + assertTrue(equality.partOf(keyChildOne, keyChildOne)); + assertTrue(equality.partOf(keyChildTwo, keyChildTwo)); + + assertTrue(equality.partOf(otherKeyChildOne, otherKeyChildOne)); } @Test - public void testSameParentSameChildren() { - Key docKey = new Key("row", "parent.document.id.child"); - Key otherKey = new Key("row", "parent.document.id.child"); - assertTrue(equality.partOf(docKey, otherKey)); - assertTrue(equality.partOf(otherKey, docKey)); + void testSameParentDifferentChildren() { + assertTrue(equality.partOf(keyChildOne, keyChildTwo)); + assertTrue(equality.partOf(keyChildTwo, keyChildOne)); } @Test - public void testSameParentDifferentChildren() { - Key docKey = new Key("row", "parent.document.id.child"); - Key otherKey = new Key("row", "parent.document.id.child2"); - assertFalse(equality.partOf(docKey, otherKey)); - assertFalse(equality.partOf(otherKey, docKey)); + void testDifferentParentSameChildren() { + assertFalse(equality.partOf(keyChildOne, otherKeyChildOne)); + assertFalse(equality.partOf(otherKeyChildOne, keyChildOne)); } @Test - public void testDifferentParentSameChildren() { - Key docKey = new Key("row", "parent.document.id.child"); - Key otherKey = new Key("row", "parent.document.id2.child"); - assertFalse(equality.partOf(docKey, otherKey)); - assertFalse(equality.partOf(otherKey, docKey)); + void testDifferentParentDifferentChildren() { + assertFalse(equality.partOf(keyChildTwo, otherKeyChildOne)); + assertFalse(equality.partOf(otherKeyChildOne, keyChildTwo)); } @Test - public void testDifferentParentDifferentChildren() { - Key docKey = new Key("row", "parent.document.id.child"); - Key otherKey = new Key("row", "parent.document.id2.child2"); - assertFalse(equality.partOf(docKey, otherKey)); - assertFalse(equality.partOf(otherKey, docKey)); + void testEdgeCases() { + assertFalse(equality.partOf(key, new Key("", ""))); + assertFalse(equality.partOf(new Key("", ""), key)); + + // in practice this should never happen + Key malformedUid = new Key("row", "datatype\0-7m7uk9.oz9qpy.-"); + assertTrue(equality.partOf(key, malformedUid)); + assertTrue(equality.partOf(malformedUid, key)); } } From cff78e0f1ce4a3180c77661cfe046b4bf627e0ea Mon Sep 17 00:00:00 2001 From: palindrome <31748527+hlgp@users.noreply.github.com> Date: Wed, 1 Nov 2023 17:50:10 -0400 Subject: [PATCH 02/32] Adding more thorough edge unit tests ahead of forthcoming refactor to guarantee functionality will remain unchanged (#2154) Co-authored-by: hlgp --- .../handler/edge/EdgeHandlerTestUtil.java | 13 +- .../ProtobufEdgeDeletePreconditionTest.java | 6 +- .../edge/ProtobufEdgePreconditionTest.java | 172 +++++++++++++++++- 3 files changed, 178 insertions(+), 13 deletions(-) diff --git a/warehouse/ingest-core/src/test/java/datawave/ingest/mapreduce/handler/edge/EdgeHandlerTestUtil.java b/warehouse/ingest-core/src/test/java/datawave/ingest/mapreduce/handler/edge/EdgeHandlerTestUtil.java index 453dc9fd07..cb11e7362d 100644 --- a/warehouse/ingest-core/src/test/java/datawave/ingest/mapreduce/handler/edge/EdgeHandlerTestUtil.java +++ b/warehouse/ingest-core/src/test/java/datawave/ingest/mapreduce/handler/edge/EdgeHandlerTestUtil.java @@ -13,10 +13,13 @@ import org.apache.log4j.Logger; import org.junit.Assert; +import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.HashMultimap; +import com.google.common.collect.ListMultimap; import com.google.common.collect.Maps; import com.google.common.collect.Multimap; +import datawave.edge.util.EdgeValue; import datawave.ingest.config.RawRecordContainerImpl; import datawave.ingest.data.RawRecordContainer; import datawave.ingest.data.config.NormalizedContentInterface; @@ -32,7 +35,8 @@ public class EdgeHandlerTestUtil { public static final Text edgeTableName = new Text(TableName.EDGE); public static final String NB = "\u0000"; - public static Set edgeKeyResults = new HashSet<>(); + public static ListMultimap edgeKeyResults = ArrayListMultimap.create(); + public static ListMultimap edgeValueResults = ArrayListMultimap.create(); private static Logger log = Logger.getLogger(EdgeHandlerTestUtil.class); @@ -65,6 +69,7 @@ public static void processEvent(Multimap even for (Map.Entry entry : contextWriter.getCache().entries()) { if (entry.getKey().getTableName().equals(edgeTableName)) { edgeKeys.add(entry.getKey().getKey()); + edgeValueResults.put(entry.getKey().getKey().getRow().toString().replaceAll(NB, "%00;"), EdgeValue.decode(entry.getValue()).toString()); } if (!entry.getKey().getTableName().equals(edgeTableName) || entry.getKey().getKey().isDeleted() == edgeDeleteMode) { if (countMap.containsKey(entry.getKey().getTableName())) { @@ -84,7 +89,11 @@ public static void processEvent(Multimap even // check edge keys for (Key k : edgeKeys) { - edgeKeyResults.add(k.getRow().toString().replaceAll(NB, "%00;")); + + String[] tempArr = {k.getColumnFamily().toString().replaceAll(NB, "%00;"), k.getColumnQualifier().toString().replaceAll(NB, "%00;"), + k.getColumnVisibility().toString(), String.valueOf(k.getTimestamp())}; + edgeKeyResults.put(k.getRow().toString().replaceAll(NB, "%00;"), tempArr); + keyPrint.add("edge key: " + k.getRow().toString().replaceAll(NB, "%00;") + " ::: " + k.getColumnFamily().toString().replaceAll(NB, "%00;") + " ::: " + k.getColumnQualifier().toString().replaceAll(NB, "%00;") + " ::: " + k.getColumnVisibility() + " ::: " + k.getTimestamp() + " ::: " + k.isDeleted() + "\n"); diff --git a/warehouse/ingest-core/src/test/java/datawave/ingest/mapreduce/handler/edge/ProtobufEdgeDeletePreconditionTest.java b/warehouse/ingest-core/src/test/java/datawave/ingest/mapreduce/handler/edge/ProtobufEdgeDeletePreconditionTest.java index 6a7918c210..4502b66896 100644 --- a/warehouse/ingest-core/src/test/java/datawave/ingest/mapreduce/handler/edge/ProtobufEdgeDeletePreconditionTest.java +++ b/warehouse/ingest-core/src/test/java/datawave/ingest/mapreduce/handler/edge/ProtobufEdgeDeletePreconditionTest.java @@ -102,7 +102,7 @@ public void testDeleteUnawarePreconSameGroup() { RawRecordContainer myEvent = getEvent(conf); EdgeHandlerTestUtil.processEvent(fields, edgeHandler, myEvent, 8, true, true); - Assert.assertEquals(expectedKeys, EdgeHandlerTestUtil.edgeKeyResults); + Assert.assertEquals(expectedKeys, EdgeHandlerTestUtil.edgeKeyResults.keySet()); } @@ -143,7 +143,7 @@ public void testDeleteUnawarePreconDifferentGroup() { EdgeHandlerTestUtil.processEvent(fields, edgeHandler, myEvent, 12, true, true); - Assert.assertEquals(expectedKeys, EdgeHandlerTestUtil.edgeKeyResults); + Assert.assertEquals(expectedKeys, EdgeHandlerTestUtil.edgeKeyResults.keySet()); } @@ -188,7 +188,7 @@ public void testDeleteUnawarePreconAndedDifferentGroup() { EdgeHandlerTestUtil.processEvent(fields, edgeHandler, myEvent, 12, true, true); - Assert.assertEquals(expectedKeys, EdgeHandlerTestUtil.edgeKeyResults); + Assert.assertEquals(expectedKeys, EdgeHandlerTestUtil.edgeKeyResults.keySet()); } diff --git a/warehouse/ingest-core/src/test/java/datawave/ingest/mapreduce/handler/edge/ProtobufEdgePreconditionTest.java b/warehouse/ingest-core/src/test/java/datawave/ingest/mapreduce/handler/edge/ProtobufEdgePreconditionTest.java index d0d5ba7b0c..ee0075800d 100644 --- a/warehouse/ingest-core/src/test/java/datawave/ingest/mapreduce/handler/edge/ProtobufEdgePreconditionTest.java +++ b/warehouse/ingest-core/src/test/java/datawave/ingest/mapreduce/handler/edge/ProtobufEdgePreconditionTest.java @@ -5,6 +5,8 @@ import java.time.Instant; import java.time.format.DateTimeFormatter; +import java.util.Collections; +import java.util.Date; import java.util.Enumeration; import java.util.HashSet; import java.util.Set; @@ -35,6 +37,7 @@ import datawave.ingest.mapreduce.SimpleDataTypeHandler; import datawave.ingest.mapreduce.job.BulkIngestKey; import datawave.ingest.time.Now; +import datawave.util.time.DateHelper; public class ProtobufEdgePreconditionTest { @@ -44,6 +47,7 @@ public class ProtobufEdgePreconditionTest { private static Type type = new Type("mycsv", FakeIngestHelper.class, null, new String[] {SimpleDataTypeHandler.class.getName()}, 10, null); private static final Now now = Now.getInstance(); private Configuration conf; + private String loadDateStr = DateHelper.format(new Date(now.get())); @Before public void setup() { @@ -62,6 +66,7 @@ public void setup() { fields.clear(); EdgeHandlerTestUtil.edgeKeyResults.clear(); + EdgeHandlerTestUtil.edgeValueResults.clear(); } private RawRecordContainer getEvent(Configuration conf) { @@ -70,6 +75,7 @@ private RawRecordContainer getEvent(Configuration conf) { myEvent.addSecurityMarking("columnVisibility", "PRIVATE"); myEvent.setDataType(type); myEvent.setId(UID.builder().newId()); + myEvent.setAltIds(Collections.singleton("0016dd72-0000-827d-dd4d-001b2163ba09")); myEvent.setConf(conf); Instant i = Instant.from(DateTimeFormatter.ISO_INSTANT.parse("2022-10-26T01:31:53Z")); @@ -107,7 +113,157 @@ public void testUnawarePreconSameGroup() { RawRecordContainer myEvent = getEvent(conf); EdgeHandlerTestUtil.processEvent(fields, edgeHandler, myEvent, 8, true, false); - Assert.assertEquals(expectedKeys, EdgeHandlerTestUtil.edgeKeyResults); + Assert.assertEquals(expectedKeys, EdgeHandlerTestUtil.edgeKeyResults.keySet()); + + // colFam + Assert.assertEquals("MY_EDGE_TYPE/TO-FROM", EdgeHandlerTestUtil.edgeKeyResults.get("guppy%00;siamese").get(0)[0]); + + // colQual + Assert.assertEquals("20221026/MY_CSV_DATA-MY_CSV_DATA///B", EdgeHandlerTestUtil.edgeKeyResults.get("guppy%00;siamese").get(0)[1]); + + // values + Assert.assertEquals(1, EdgeHandlerTestUtil.edgeValueResults.get("guppy%00;siamese").size()); + Assert.assertEquals( + "count: 1, bitmask: 2, sourceValue: guppy, sinkValue: siamese, hours: , duration: , loadDate: " + loadDateStr + + ", uuidString: , uuidObj: 0016dd72-0000-827d-dd4d-001b2163ba09, badActivityDate: ", + EdgeHandlerTestUtil.edgeValueResults.get("guppy%00;siamese").get(0)); + Assert.assertEquals(1, EdgeHandlerTestUtil.edgeValueResults.get("guppy").size()); + Assert.assertEquals( + "count: , bitmask: , sourceValue: guppy, sinkValue: , hours: [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], duration: , loadDate: " + + loadDateStr + ", uuidString: , uuidObj: 0016dd72-0000-827d-dd4d-001b2163ba09, badActivityDate: ", + EdgeHandlerTestUtil.edgeValueResults.get("guppy").get(0)); + + // vis and ts + Assert.assertEquals("PRIVATE", EdgeHandlerTestUtil.edgeKeyResults.get("guppy%00;siamese").get(0)[2]); + Assert.assertEquals("1666747913000", EdgeHandlerTestUtil.edgeKeyResults.get("guppy%00;siamese").get(0)[3]); + + } + + @Test + public void testUnawarePreconSameGroupEarlyActivityDate() { + // FELINE == 'tabby' + + fields.put("EVENT_DATE", new BaseNormalizedContent("EVENT_DATE", "2022-10-26T01:31:53Z")); + fields.put("UUID", new BaseNormalizedContent("UUID", "0016dd72-0000-827d-dd4d-001b2163ba09")); + fields.put("FELINE", new NormalizedFieldAndValue("FELINE", "tabby", "PET", "0")); + fields.put("FELINE", new NormalizedFieldAndValue("FELINE", "siamese", "PET", "1")); + fields.put("FISH", new NormalizedFieldAndValue("FISH", "salmon", "PET", "0")); + fields.put("FISH", new NormalizedFieldAndValue("FISH", "guppy", "PET", "1")); + fields.put("ACTIVITY", new NormalizedFieldAndValue("ACTIVITY", "fetch", "THING", "0")); + + ProtobufEdgeDataTypeHandler edgeHandler = new ProtobufEdgeDataTypeHandler<>(); + TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); + edgeHandler.setup(context); + + Set expectedKeys = new HashSet<>(); + expectedKeys.add("guppy"); + expectedKeys.add("guppy%00;siamese"); + expectedKeys.add("salmon"); + expectedKeys.add("salmon%00;tabby"); + expectedKeys.add("siamese"); + expectedKeys.add("siamese%00;guppy"); + expectedKeys.add("tabby"); + expectedKeys.add("tabby%00;salmon"); + + RawRecordContainer myEvent = getEvent(conf); + myEvent.setDate(1666737913000L); + + // the count is doubled since activity < event date in this test. In this case, we add 2 edges each. + EdgeHandlerTestUtil.processEvent(fields, edgeHandler, myEvent, 16, true, false); + Assert.assertEquals(expectedKeys, EdgeHandlerTestUtil.edgeKeyResults.keySet()); + + Assert.assertEquals("MY_EDGE_TYPE/TO-FROM", EdgeHandlerTestUtil.edgeKeyResults.get("guppy%00;siamese").get(0)[0]); + + // the dates + Assert.assertEquals("20221025/MY_CSV_DATA-MY_CSV_DATA///A", EdgeHandlerTestUtil.edgeKeyResults.get("guppy%00;siamese").get(0)[1]); + Assert.assertEquals("20221026/MY_CSV_DATA-MY_CSV_DATA///C", EdgeHandlerTestUtil.edgeKeyResults.get("guppy%00;siamese").get(1)[1]); + + // values + Assert.assertEquals(2, EdgeHandlerTestUtil.edgeValueResults.get("guppy%00;siamese").size()); + Assert.assertEquals( + "count: 1, bitmask: 4194304, sourceValue: guppy, sinkValue: siamese, hours: , duration: , loadDate: " + loadDateStr + + ", uuidString: , uuidObj: 0016dd72-0000-827d-dd4d-001b2163ba09, badActivityDate: false", + EdgeHandlerTestUtil.edgeValueResults.get("guppy%00;siamese").get(0)); + Assert.assertEquals( + "count: 1, bitmask: 2, sourceValue: guppy, sinkValue: siamese, hours: , duration: , loadDate: " + loadDateStr + + ", uuidString: , uuidObj: 0016dd72-0000-827d-dd4d-001b2163ba09, badActivityDate: ", + EdgeHandlerTestUtil.edgeValueResults.get("guppy%00;siamese").get(1)); + Assert.assertEquals(2, EdgeHandlerTestUtil.edgeValueResults.get("guppy").size()); + Assert.assertEquals( + "count: , bitmask: , sourceValue: guppy, sinkValue: , hours: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0], duration: , loadDate: " + + loadDateStr + ", uuidString: , uuidObj: 0016dd72-0000-827d-dd4d-001b2163ba09, badActivityDate: false", + EdgeHandlerTestUtil.edgeValueResults.get("guppy").get(0)); + Assert.assertEquals( + "count: , bitmask: , sourceValue: guppy, sinkValue: , hours: [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], duration: , loadDate: " + + loadDateStr + ", uuidString: , uuidObj: 0016dd72-0000-827d-dd4d-001b2163ba09, badActivityDate: ", + EdgeHandlerTestUtil.edgeValueResults.get("guppy").get(1)); + + Assert.assertEquals("PRIVATE", EdgeHandlerTestUtil.edgeKeyResults.get("guppy%00;siamese").get(0)[2]); + Assert.assertEquals("1666737913000", EdgeHandlerTestUtil.edgeKeyResults.get("guppy%00;siamese").get(0)[3]); + + } + + @Test + public void testUnawarePreconSameGroupVeryOldData() { + // FELINE == 'tabby' + + fields.put("EVENT_DATE", new BaseNormalizedContent("EVENT_DATE", "1966-09-08")); + fields.put("UUID", new BaseNormalizedContent("UUID", "0016dd72-0000-827d-dd4d-001b2163ba09")); + fields.put("FELINE", new NormalizedFieldAndValue("FELINE", "tabby", "PET", "0")); + fields.put("FELINE", new NormalizedFieldAndValue("FELINE", "siamese", "PET", "1")); + fields.put("FISH", new NormalizedFieldAndValue("FISH", "salmon", "PET", "0")); + fields.put("FISH", new NormalizedFieldAndValue("FISH", "guppy", "PET", "1")); + fields.put("ACTIVITY", new NormalizedFieldAndValue("ACTIVITY", "fetch", "THING", "0")); + + ProtobufEdgeDataTypeHandler edgeHandler = new ProtobufEdgeDataTypeHandler<>(); + TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); + edgeHandler.setup(context); + + Set expectedKeys = new HashSet<>(); + expectedKeys.add("guppy"); + expectedKeys.add("guppy%00;siamese"); + expectedKeys.add("salmon"); + expectedKeys.add("salmon%00;tabby"); + expectedKeys.add("siamese"); + expectedKeys.add("siamese%00;guppy"); + expectedKeys.add("tabby"); + expectedKeys.add("tabby%00;salmon"); + + RawRecordContainer myEvent = getEvent(conf); + myEvent.setDate(0L); + + // the count is doubled since activity < event date in this test. In this case, we add 2 edges each. + EdgeHandlerTestUtil.processEvent(fields, edgeHandler, myEvent, 16, true, false); + Assert.assertEquals(expectedKeys, EdgeHandlerTestUtil.edgeKeyResults.keySet()); + + Assert.assertEquals("MY_EDGE_TYPE/TO-FROM", EdgeHandlerTestUtil.edgeKeyResults.get("guppy%00;siamese").get(0)[0]); + + // the dates + Assert.assertEquals("19700101/MY_CSV_DATA-MY_CSV_DATA///A", EdgeHandlerTestUtil.edgeKeyResults.get("guppy%00;siamese").get(0)[1]); + Assert.assertEquals("19660908/MY_CSV_DATA-MY_CSV_DATA///C", EdgeHandlerTestUtil.edgeKeyResults.get("guppy%00;siamese").get(1)[1]); + + // values + Assert.assertEquals(2, EdgeHandlerTestUtil.edgeValueResults.get("guppy%00;siamese").size()); + Assert.assertEquals( + "count: 1, bitmask: , sourceValue: guppy, sinkValue: siamese, hours: , duration: , loadDate: " + loadDateStr + + ", uuidString: , uuidObj: 0016dd72-0000-827d-dd4d-001b2163ba09, badActivityDate: false", + EdgeHandlerTestUtil.edgeValueResults.get("guppy%00;siamese").get(0)); + Assert.assertEquals( + "count: 1, bitmask: 1, sourceValue: guppy, sinkValue: siamese, hours: , duration: , loadDate: " + loadDateStr + + ", uuidString: , uuidObj: 0016dd72-0000-827d-dd4d-001b2163ba09, badActivityDate: ", + EdgeHandlerTestUtil.edgeValueResults.get("guppy%00;siamese").get(1)); + Assert.assertEquals(2, EdgeHandlerTestUtil.edgeValueResults.get("guppy").size()); + Assert.assertEquals( + "count: , bitmask: , sourceValue: guppy, sinkValue: , hours: [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], duration: , loadDate: " + + loadDateStr + ", uuidString: , uuidObj: 0016dd72-0000-827d-dd4d-001b2163ba09, badActivityDate: ", + EdgeHandlerTestUtil.edgeValueResults.get("guppy").get(0)); + Assert.assertEquals( + "count: , bitmask: , sourceValue: guppy, sinkValue: , hours: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], duration: , loadDate: " + + loadDateStr + ", uuidString: , uuidObj: 0016dd72-0000-827d-dd4d-001b2163ba09, badActivityDate: false", + EdgeHandlerTestUtil.edgeValueResults.get("guppy").get(1)); + + Assert.assertEquals("PRIVATE", EdgeHandlerTestUtil.edgeKeyResults.get("guppy%00;siamese").get(0)[2]); + Assert.assertEquals("0", EdgeHandlerTestUtil.edgeKeyResults.get("guppy%00;siamese").get(0)[3]); } @@ -147,7 +303,7 @@ public void testUnawarePreconDifferentGroup() { RawRecordContainer myEvent = getEvent(conf); EdgeHandlerTestUtil.processEvent(fields, edgeHandler, myEvent, 12, true, false); - Assert.assertEquals(expectedKeys, EdgeHandlerTestUtil.edgeKeyResults); + Assert.assertEquals(expectedKeys, EdgeHandlerTestUtil.edgeKeyResults.keySet()); } @@ -175,7 +331,7 @@ public void testAwarePreconSameGroup() { RawRecordContainer myEvent = getEvent(conf); EdgeHandlerTestUtil.processEvent(fields, edgeHandler, myEvent, 4, true, false); - Assert.assertEquals(expectedKeys, EdgeHandlerTestUtil.edgeKeyResults); + Assert.assertEquals(expectedKeys, EdgeHandlerTestUtil.edgeKeyResults.keySet()); } @@ -206,7 +362,7 @@ public void testAwarePreconDifferentGroup() { RawRecordContainer myEvent = getEvent(conf); EdgeHandlerTestUtil.processEvent(fields, edgeHandler, myEvent, 7, true, false); - Assert.assertEquals(expectedKeys, EdgeHandlerTestUtil.edgeKeyResults); + Assert.assertEquals(expectedKeys, EdgeHandlerTestUtil.edgeKeyResults.keySet()); } @@ -237,7 +393,7 @@ public void testAwareFieldComparison() { RawRecordContainer myEvent = getEvent(conf); EdgeHandlerTestUtil.processEvent(fields, edgeHandler, myEvent, 4, true, false); - Assert.assertEquals(expectedKeys, EdgeHandlerTestUtil.edgeKeyResults); + Assert.assertEquals(expectedKeys, EdgeHandlerTestUtil.edgeKeyResults.keySet()); } @@ -260,7 +416,7 @@ public void testAwareFieldComparisonNullCheck() { RawRecordContainer myEvent = getEvent(conf); EdgeHandlerTestUtil.processEvent(fields, edgeHandler, myEvent, 0, true, false); - Assert.assertEquals(expectedKeys, EdgeHandlerTestUtil.edgeKeyResults); + Assert.assertEquals(expectedKeys, EdgeHandlerTestUtil.edgeKeyResults.keySet()); } @@ -299,7 +455,7 @@ public void testAwareOrGroupsNotEqual() { RawRecordContainer myEvent = getEvent(conf); EdgeHandlerTestUtil.processEvent(fields, edgeHandler, myEvent, 8, true, false); - Assert.assertEquals(expectedKeys, EdgeHandlerTestUtil.edgeKeyResults); + Assert.assertEquals(expectedKeys, EdgeHandlerTestUtil.edgeKeyResults.keySet()); } @@ -334,7 +490,7 @@ public void testAwareGreaterThanSameGroup() { RawRecordContainer myEvent = getEvent(conf); EdgeHandlerTestUtil.processEvent(fields, edgeHandler, myEvent, 4, true, false); - Assert.assertEquals(expectedKeys, EdgeHandlerTestUtil.edgeKeyResults); + Assert.assertEquals(expectedKeys, EdgeHandlerTestUtil.edgeKeyResults.keySet()); } } From 1ba664f1ac47a0ce3991899947213dc91812504a Mon Sep 17 00:00:00 2001 From: Drew Farris Date: Wed, 1 Nov 2023 16:24:51 -0400 Subject: [PATCH 03/32] Implements improved weighted SSDeep match scoring (#2129) * SSDeepSimilarityQuery logic match scoring improvements * implements improved weighted SSDeep match scoring * implements optional min score threshold parameter * removed MATCH_RANK because it does not reflect sorted scoring * Updated unit test assertions for SSDeepSimilarityQueryTransformerTest * Whitespace formatting for SSDeep-related code * Updates per code review * Further updates per code review * Further updates per code review * Further updates per code review --------- Co-authored-by: hgklohr --- .../SSDeepSimilarityQueryTransformer.java | 78 +++++++-- .../query/util/ssdeep/NGramScoreTuple.java | 47 ++++-- .../query/util/ssdeep/SSDeepHash.java | 13 +- .../query/util/ssdeep/SSDeepHashScorer.java | 119 ++++++++++++++ .../java/datawave/query/SSDeepQueryTest.java | 151 ++++++++++++++---- .../SSDeepSimilarityQueryTransformerTest.java | 2 + .../util/ssdeep/SSDeepHashScorerTest.java | 40 +++++ 7 files changed, 382 insertions(+), 68 deletions(-) create mode 100644 warehouse/query-core/src/main/java/datawave/query/util/ssdeep/SSDeepHashScorer.java create mode 100644 warehouse/query-core/src/test/java/datawave/query/util/ssdeep/SSDeepHashScorerTest.java diff --git a/warehouse/query-core/src/main/java/datawave/query/transformer/SSDeepSimilarityQueryTransformer.java b/warehouse/query-core/src/main/java/datawave/query/transformer/SSDeepSimilarityQueryTransformer.java index 74bd1806d9..92deebc7ad 100644 --- a/warehouse/query-core/src/main/java/datawave/query/transformer/SSDeepSimilarityQueryTransformer.java +++ b/warehouse/query-core/src/main/java/datawave/query/transformer/SSDeepSimilarityQueryTransformer.java @@ -22,7 +22,9 @@ import datawave.query.util.ssdeep.NGramScoreTuple; import datawave.query.util.ssdeep.NGramTuple; import datawave.query.util.ssdeep.SSDeepHash; +import datawave.query.util.ssdeep.SSDeepHashScorer; import datawave.webservice.query.Query; +import datawave.webservice.query.QueryImpl; import datawave.webservice.query.exception.EmptyObjectException; import datawave.webservice.query.logic.BaseQueryLogicTransformer; import datawave.webservice.query.result.event.EventBase; @@ -33,6 +35,8 @@ public class SSDeepSimilarityQueryTransformer extends BaseQueryLogicTransformer,Map.Entry> { + public static final String MIN_SSDEEP_SCORE_PARAMETER = "minScore"; + private static final Logger log = Logger.getLogger(SSDeepSimilarityQueryTransformer.class); protected final Authorizations auths; @@ -58,11 +62,17 @@ public class SSDeepSimilarityQueryTransformer extends BaseQueryLogicTransformer< /** Tracks which ssdeep hashes each of the ngrams originated from */ final Multimap queryMap; + /** The maximum number of repeated characters allowed in a ssdeep hash - used to perform normalization for scoring */ + final int maxRepeatedCharacters; + + final int minScoreThreshold; + public SSDeepSimilarityQueryTransformer(Query query, SSDeepSimilarityQueryConfiguration config, MarkingFunctions markingFunctions, ResponseObjectFactory responseObjectFactory) { super(markingFunctions); this.auths = new Authorizations(query.getQueryAuthorizations().split(",")); this.queryMap = config.getQueryMap(); + this.maxRepeatedCharacters = config.getMaxRepeatedCharacters(); this.responseObjectFactory = responseObjectFactory; this.bucketEncoder = new IntegerEncoding(config.getBucketEncodingBase(), config.getBucketEncodingLength()); @@ -70,6 +80,28 @@ public SSDeepSimilarityQueryTransformer(Query query, SSDeepSimilarityQueryConfig this.chunkStart = bucketEncoder.getLength(); this.chunkEnd = chunkStart + chunkSizeEncoding.getLength(); + + this.minScoreThreshold = readOptionalMinScoreThreshold(query); + } + + private int readOptionalMinScoreThreshold(Query query) { + QueryImpl.Parameter minScoreParameter = query.findParameter(MIN_SSDEEP_SCORE_PARAMETER); + if (minScoreParameter != null) { + String minScoreString = minScoreParameter.getParameterValue(); + try { + int minScore = Integer.parseInt(minScoreString); + if (minScore < 0 || minScore > 100) { + log.warn("Ssdeep score threshold must be between 0-100, but was " + minScoreString + ", ignoring " + MIN_SSDEEP_SCORE_PARAMETER + + " parameter."); + } else { + return minScore; + } + } catch (NumberFormatException e) { + log.warn("Number format exception encountered when parsing score threshold of '" + minScoreString + "' ignoring " + MIN_SSDEEP_SCORE_PARAMETER + + " parameter."); + } + } + return 0; } @Override @@ -112,12 +144,12 @@ public BaseQueryResponse generateResponseFromScores(Multimap transform(Map.Entry input) th * @return a map of ssdeep hashes to score tuples. */ protected Multimap scoreQuery(Multimap queryMap, Multimap chunkPostings) { - // score based on chunk match count + // The base match score based on the number of matching ngrams shared between the query hash and the matched hash + // This map tracks that: the query hash is the key, matching hash and score is the value. final Map> scoredHashMatches = new TreeMap<>(); // align the chunk postings to their original query ssdeep hash and count the number of matches - // for each chunk that corresponds to that original ssdeep hash - chunkPostings.asMap().forEach((hash, cpc) -> { - log.trace("Posting " + hash + " had " + cpc.size() + "chunk tuples"); - cpc.forEach(ct -> { - Collection ssdhc = queryMap.get(ct); - log.trace("Chunk tuple " + ct + " had " + ssdhc.size() + "related query hashes"); - ssdhc.forEach(ssdh -> { - final Map chunkMatchCount = scoredHashMatches.computeIfAbsent(ssdh, s -> new TreeMap<>()); - final Integer score = chunkMatchCount.computeIfAbsent(hash, m -> 0); - log.trace("Incrementing score for " + ssdh + "," + hash + " by " + cpc.size()); - chunkMatchCount.put(hash, score + 1); + // for each chunk that corresponds to that original ssdeep hash. The number of ngrams that the query and + // target have in common thus become the base score. + chunkPostings.asMap().forEach((matchingHash, matchingNgrams) -> { + log.trace("Posting " + matchingHash + " had " + matchingNgrams.size() + " matching ngrams"); + matchingNgrams.forEach(matchingNgram -> { // for each matching hash ngram + Collection queryHashes = queryMap.get(matchingNgram); // find the queries that included that ngram + log.trace("Ngram " + matchingNgram + " had " + queryHashes.size() + " related query hashes"); + queryHashes.forEach(queryHash -> { // increment the score for each query the ngram appeared in. + final Map chunkMatchCount = scoredHashMatches.computeIfAbsent(queryHash, s -> new TreeMap<>()); + final Integer score = chunkMatchCount.computeIfAbsent(matchingHash, m -> 0); + log.trace("Incrementing score for " + queryHash + "," + matchingHash + " by 1"); + chunkMatchCount.put(matchingHash, score + 1); }); }); }); - // convert the counted chunks into tuples. + final SSDeepHashScorer scorer = new SSDeepHashScorer(maxRepeatedCharacters); + + // convert the counted chunks into score tuples. final Multimap scoreTuples = TreeMultimap.create(); - scoredHashMatches.forEach((sdh, cmc) -> cmc.forEach((k, v) -> scoreTuples.put(sdh, new NGramScoreTuple(k, v)))); + scoredHashMatches.forEach((queryHash, scoredMatches) -> { + scoredMatches.forEach((matchingHash, baseScore) -> { + int weightedScore = scorer.apply(queryHash, matchingHash); + // keep the scored tuple if either the minScoreThreshold is not set or the weightedScore exceeds the set threshold. + if (minScoreThreshold <= 0 || weightedScore > minScoreThreshold) { + scoreTuples.put(queryHash, new NGramScoreTuple(matchingHash, baseScore, weightedScore)); + } + }); + }); return scoreTuples; } diff --git a/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/NGramScoreTuple.java b/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/NGramScoreTuple.java index ebb5905ede..d64fb0d3df 100644 --- a/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/NGramScoreTuple.java +++ b/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/NGramScoreTuple.java @@ -1,48 +1,65 @@ package datawave.query.util.ssdeep; import java.io.Serializable; -import java.util.Objects; public class NGramScoreTuple implements Serializable, Comparable { final SSDeepHash ssDeepHash; - final float score; + final float baseScore; + final int weightedScore; - public NGramScoreTuple(SSDeepHash ssDeepHash, float score) { + public NGramScoreTuple(SSDeepHash ssDeepHash, float baseScore, int weightedScore) { this.ssDeepHash = ssDeepHash; - this.score = score; + this.baseScore = baseScore; + this.weightedScore = weightedScore; } public SSDeepHash getSsDeepHash() { return ssDeepHash; } - public float getScore() { - return score; + public float getBaseScore() { + return baseScore; + } + + public int getWeightedScore() { + return weightedScore; + } + + @Override + public String toString() { + return "ScoreTuple{" + "hash=" + ssDeepHash + ", baseScore=" + baseScore + ", weightedScore=" + weightedScore + '}'; } @Override public boolean equals(Object o) { if (this == o) return true; - if (!(o instanceof NGramScoreTuple)) + if (o == null || getClass() != o.getClass()) return false; + NGramScoreTuple that = (NGramScoreTuple) o; - return ssDeepHash == that.ssDeepHash && Float.compare(that.score, score) == 0; - } - @Override - public int hashCode() { - return Objects.hash(ssDeepHash, score); + if (baseScore == that.baseScore) + return false; + if (weightedScore == that.weightedScore) + return false; + return ssDeepHash.equals(that.ssDeepHash); } @Override - public String toString() { - return "ScoreTuple{" + "hash=" + ssDeepHash + ", score=" + score + '}'; + public int hashCode() { + int result = ssDeepHash.hashCode(); + result = 31 * result + (baseScore != 0.0f ? Float.floatToIntBits(baseScore) : 0); + result = 31 * result + weightedScore; + return result; } @Override public int compareTo(NGramScoreTuple o) { - int cmp = Float.compare(o.score, score); + int cmp = Integer.compare(o.weightedScore, weightedScore); + if (cmp == 0) { + cmp = Float.compare(o.baseScore, baseScore); + } if (cmp == 0) { cmp = ssDeepHash.compareTo(o.ssDeepHash); } diff --git a/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/SSDeepHash.java b/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/SSDeepHash.java index 0d398357e1..c1c5392e89 100644 --- a/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/SSDeepHash.java +++ b/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/SSDeepHash.java @@ -163,7 +163,8 @@ public static SSDeepHash normalize(final SSDeepHash input) { public static SSDeepHash normalize(final SSDeepHash input, int maxRepeatedCharacters) { final String n1 = normalizeSSDeepChunk(input.getChunk(), maxRepeatedCharacters); final String n2 = normalizeSSDeepChunk(input.getDoubleChunk(), maxRepeatedCharacters); - if (n1 == null && n2 == null) { + // we really do want '==' here, not equals. neither chunk is changed, so just return the input. + if (n1 == input.getChunk() && (n2 == input.getDoubleChunk())) { return input; } return new SSDeepHash(input.getChunkSize(), n1 == null ? input.getChunk() : n1, n2 == null ? input.getDoubleChunk() : n2); @@ -175,18 +176,18 @@ public SSDeepHash normalize(int maxRepeatedCharacters) { /** * Given a string that potentially contains long runs of repeating characters, replace such runs with at most maxRepeated characters. If the string is not - * modified, return null. + * modified, return the input string. * * @param input * the string to analyze and possibly modify. * @param maxRepeatedCharacters * the number of maxRepeatedCharacters to allow. Any String that has a run of more than this many of the same character will have that run * collapsed to be this many characters in length. Zero indicates that no normalization should be performed. - * @return the modified string or null if the string is not modified. + * @return the modified string or the original string if the string is not modified. */ public static String normalizeSSDeepChunk(final String input, final int maxRepeatedCharacters) { if (maxRepeatedCharacters <= 0) { - return null; // do nothing. + return input; // do nothing. } final char[] data = input.toCharArray(); final int length = data.length; @@ -215,11 +216,11 @@ public static String normalizeSSDeepChunk(final String input, final int maxRepea } } - // if we have modified the data, create and return a string otherwise, null + // if we have modified the data, create and return a string otherwise, return the input unchanged if (destIndex < length) { return new String(data, 0, destIndex); } else { - return null; + return input; } } diff --git a/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/SSDeepHashScorer.java b/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/SSDeepHashScorer.java new file mode 100644 index 0000000000..fce7cb5d66 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/SSDeepHashScorer.java @@ -0,0 +1,119 @@ +package datawave.query.util.ssdeep; + +import org.apache.commons.text.similarity.LevenshteinDistance; +import org.apache.log4j.Logger; + +/** Implements functions to calculate a similarity score for a pair of SSDeepHashes */ +public class SSDeepHashScorer { + private static final Logger log = Logger.getLogger(SSDeepHash.class); + + private final int maxRepeatedCharacters; + + public SSDeepHashScorer() { + this(SSDeepHash.DEFAULT_MAX_REPEATED_CHARACTERS); + } + + public SSDeepHashScorer(int maxRepeatedCharacters) { + this.maxRepeatedCharacters = maxRepeatedCharacters; + } + + /** + * Compare two ssdeep hashes, returning a score between 0 to 100 that indicates similarity. A score of 0 means that the items are not similar at all whereas + * a score of 100 indicates a high degree of similarity. + * + * @param signature1 + * the first object to be compared. + * @param signature2 + * the second object to be compared. + * @return an integer between 0 and 100 + */ + public int apply(SSDeepHash signature1, SSDeepHash signature2) { + if ((null == signature1) || (null == signature2)) { + return -1; + } + final long chunkSize1 = signature1.getChunkSize(); + final long chunkSize2 = signature2.getChunkSize(); + + // We require the chunk size to either be equal, or for one to be twice the other. If the chunk sizes don't + // match then we are comparing apples to oranges. This isn't an 'error' per se. We could have two valid + // ssdeep hashes, but with chunk sizes so different they can't be compared. + if ((chunkSize1 != chunkSize2) && (chunkSize1 != (chunkSize2 * 2)) && (chunkSize2 != (chunkSize1 * 2))) { + if (log.isDebugEnabled()) { + log.debug("block sizes too different: " + chunkSize1 + " " + chunkSize2); + } + return 0; + } + + // There is very little information content in sequences of the same character like 'LLLLL'. Eliminate any + // sequences longer than MAX_REPEATED_CHARACTERS (3). + final String s1chunk = SSDeepHash.normalizeSSDeepChunk(signature1.getChunk(), maxRepeatedCharacters); + final String s1doubleChunk = SSDeepHash.normalizeSSDeepChunk(signature1.getDoubleChunk(), maxRepeatedCharacters); + final String s2chunk = SSDeepHash.normalizeSSDeepChunk(signature2.getChunk(), maxRepeatedCharacters); + final String s2doubleChunk = SSDeepHash.normalizeSSDeepChunk(signature2.getDoubleChunk(), maxRepeatedCharacters); + + // Each ssdeep has two chunks with different chunk sizes. Choose which ones to use from each hash for scoring. + final long score; + if (chunkSize1 == chunkSize2) { + // The ssdeep chunk sizes are equal. + final long score1 = scoreChunks(s1chunk, s2chunk, chunkSize1); + final long score2 = scoreChunks(s1doubleChunk, s2doubleChunk, chunkSize2); + score = Math.max(score1, score2); + } else if (chunkSize1 == (chunkSize2 * 2)) { + // The first ssdeep has twice the chunk size of the second. + score = scoreChunks(s1chunk, s2doubleChunk, chunkSize1); + } else { + // The second ssdeep has twice the chunk size of the first. + score = scoreChunks(s1doubleChunk, s2chunk, chunkSize2); + } + + return (int) score; + } + + /** + * This is the low level chunk scoring algorithm. It takes two chunks and scores them on a scale of 0-100 where 0 is a terrible match and 100 is a great + * match. The chunkSize is used to cope with very small messages. + */ + private static int scoreChunks(final String s1, final String s2, final long chunkSize) { + final int len1 = s1.length(); + final int len2 = s2.length(); + + if ((len1 > SSDeepHash.CHUNK_LENGTH) || (len2 > SSDeepHash.CHUNK_LENGTH)) { + // one of the chunk lengths exceeds the max chunk length, perhaps it is not a real ssdeep? + return 0; + } + + // Compute the edit distance between the two chunk strings. The edit distance gives us a pretty good idea of + // how closely related the two chunks are. + int editDistance = LevenshteinDistance.getDefaultInstance().apply(s1, s2); + if (log.isDebugEnabled()) { + log.debug("edit_dist: " + editDistance); + } + + // Scale the edit distance by the lengths of the two chunks. This changes the baseScore to be a measure of the + // proportion of the message that has changed rather than an absolute quantity. It also copes with the + // variability of the chunk string lengths. + int score = (editDistance * SSDeepHash.CHUNK_LENGTH) / (len1 + len2); + + // At this stage the baseScore occurs roughly on a 0-64 scale, + // with 0 being a good match and 64 being a complete mismatch. + + // Rescale to a 0-100 scale (friendlier to humans). + score = (100 * score) / SSDeepHash.CHUNK_LENGTH; + + // It is possible to get a baseScore above 100 here, but it is a really terrible match. + if (score >= 100) { + return 0; + } + + // Invert the score with 0 being a poor match and 100 being a excellent match. + score = 100 - score; + + // When the chunk size is small we don't want to exaggerate the match. + final int threshold = (int) (chunkSize / SSDeepHash.MIN_CHUNK_SIZE * Math.min(len1, len2)); + if (score > threshold) { + score = threshold; + } + + return score; + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/SSDeepQueryTest.java b/warehouse/query-core/src/test/java/datawave/query/SSDeepQueryTest.java index bb932abe2f..4b2d169de4 100644 --- a/warehouse/query-core/src/test/java/datawave/query/SSDeepQueryTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/SSDeepQueryTest.java @@ -2,6 +2,7 @@ import static org.junit.Assert.fail; +import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; @@ -57,6 +58,8 @@ public class SSDeepQueryTest { public static String[] TEST_SSDEEPS = {"12288:002r/VG4GjeZHkwuPikQ7lKH5p5H9x1beZHkwulizQ1lK55pGxlXTd8zbW:002LVG4GjeZEXi37l6Br1beZEdic1lmu", "6144:02C3nq73v1kHGhs6y7ppFj93NRW6/ftZTgC6e8o4toHZmk6ZxoXb0ns:02C4cGCLjj9Swfj9koHEk6/Fns", "3072:02irbxzGAFYDMxud7fKg3dXVmbOn5u46Kjnz/G8VYrs123D6pIJLIOSP:02MKlWQ7Sg3d4bO968rm7JO", + "3072:03jscyaGAFYDMxud7fKg3dXVmbOn5u46Kjnz/G8VYrs123D6pIJLIOSP:03NLmXR7Sg3d4bO968rm7JO", + "3072:03jscyaZZZZZYYYYXXXWWdXVmbOn5u46KjnzWWWXXXXYYYYYYZZZZZZZ:03NLmXR7ZZZYYXW9WXYYZZZ", "48:1aBhsiUw69/UXX0x0qzNkVkydf2klA8a7Z35:155w69MXAlNkmkWTF5", "196608:wEEE+EEEEE0LEEEEEEEEEEREEEEhEEETEEEEEWUEEEJEEEEcEEEEEEEE3EEEEEEN:", "1536:0YgNvw/OmgPgiQeI+25Nh6+RS5Qa8LmbyfAiIRgizy1cBx76UKYbD+iD/RYgNvw6:", "12288:222222222222222222222222222222222:"}; @@ -135,17 +138,6 @@ public static void loadData() throws Exception { logSSDeepTestData(tableName); } - private static void logSSDeepTestData(String tableName) throws TableNotFoundException { - Scanner scanner = accumuloClient.createScanner(tableName, auths); - Iterator> iterator = scanner.iterator(); - log.debug("*************** " + tableName + " ********************"); - while (iterator.hasNext()) { - Map.Entry entry = iterator.next(); - log.debug(entry); - } - scanner.close(); - } - @Before public void setUpQuery() { logic = new SSDeepSimilarityQueryLogic(); @@ -162,38 +154,65 @@ public void setUpQuery() { } @Test - public void testSingleQuery() throws Exception { + /** Test that a single query ssdeep with no match score threshold returns the expected results */ + public void testSingleQueryNoMinScore() throws Exception { + runSingleQuery(false); + } + + @Test + /** Test that a single query ssdeep with a min score threshold returns the expected results */ + public void testSingleQueryMinScore() throws Exception { + runSingleQuery(true); + } + + private static void logSSDeepTestData(String tableName) throws TableNotFoundException { + Scanner scanner = accumuloClient.createScanner(tableName, auths); + Iterator> iterator = scanner.iterator(); + log.debug("*************** " + tableName + " ********************"); + while (iterator.hasNext()) { + Map.Entry entry = iterator.next(); + log.debug(entry); + } + scanner.close(); + } + + public void runSingleQuery(boolean applyMinScoreThreshold) throws Exception { String query = "CHECKSUM_SSDEEP:" + TEST_SSDEEPS[2]; - EventQueryResponseBase response = runSSDeepQuery(query); + + final int minScoreThreshold = applyMinScoreThreshold ? 65 : 0; + final int expectedEventCount = applyMinScoreThreshold ? 2 : 3; + + EventQueryResponseBase response = runSSDeepQuery(query, minScoreThreshold); List events = response.getEvents(); int eventCount = events.size(); + Map> observedEvents = extractObservedEvents(events); - Map observedFields = new HashMap<>(); - if (eventCount > 0) { - for (EventBase e : events) { - List fields = e.getFields(); - for (FieldBase f : fields) { - observedFields.put(f.getName(), f.getValueString()); - } - } - } + Assert.assertEquals(expectedEventCount, eventCount); - Assert.assertFalse("Observed fields was unexpectedly empty", observedFields.isEmpty()); - Assert.assertEquals("65.0", observedFields.remove("MATCH_SCORE")); - Assert.assertEquals("1", observedFields.remove("MATCH_RANK")); - Assert.assertEquals("3072:02irbxzGAFYDMxud7fKg3dXVmbOn5u46Kjnz/G8VYrs123D6pIJLIOSP:02MKlWQ7Sg3d4bO968rm7JO", observedFields.remove("QUERY_SSDEEP")); - Assert.assertEquals("3072:02irbxzGAFYDMxud7fKg3dXVmbOn5u46Kjnz/G8VYrs123D6pIJLIOSP:02MKlWQ7Sg3d4bO968rm7JO", observedFields.remove("MATCHING_SSDEEP")); - Assert.assertTrue("Observed unexpected field(s): " + observedFields, observedFields.isEmpty()); - Assert.assertEquals(1, eventCount); + // find the fields for the self match example. + assertMatch(TEST_SSDEEPS[2], TEST_SSDEEPS[2], "65.0", "1", "100", observedEvents); + + // find and validate the fields for the partial match example. + assertMatch(TEST_SSDEEPS[2], TEST_SSDEEPS[3], "51.0", "2", "96", observedEvents); + + if (applyMinScoreThreshold) + assertNoMatch(TEST_SSDEEPS[2], TEST_SSDEEPS[3], observedEvents); + else + assertMatch(TEST_SSDEEPS[2], TEST_SSDEEPS[4], "9.0", "3", "63", observedEvents); } - public EventQueryResponseBase runSSDeepQuery(String query) throws Exception { + public EventQueryResponseBase runSSDeepQuery(String query, int minScoreThreshold) throws Exception { + QueryImpl q = new QueryImpl(); q.setQuery(query); q.setId(UUID.randomUUID()); q.setPagesize(Integer.MAX_VALUE); q.setQueryAuthorizations(auths.toString()); + if (minScoreThreshold > 0) { + q.addParameter(SSDeepSimilarityQueryTransformer.MIN_SSDEEP_SCORE_PARAMETER, String.valueOf(minScoreThreshold)); + } + RunningQuery runner = new RunningQuery(accumuloClient, AccumuloConnectionFactory.Priority.NORMAL, this.logic, q, "", principal, new QueryMetricFactoryImpl()); TransformIterator transformIterator = runner.getTransformIterator(); @@ -202,4 +221,76 @@ public EventQueryResponseBase runSSDeepQuery(String query) throws Exception { return response; } + + /** Extract the events from a set of results into an easy to manage data structure for validation */ + public Map> extractObservedEvents(List events) { + int eventCount = events.size(); + Map> observedEvents = new HashMap<>(); + if (eventCount > 0) { + for (EventBase e : events) { + Map observedFields = new HashMap<>(); + String querySsdeep = "UNKNOWN_QUERY"; + String matchingSsdeep = "UNKNOWN_MATCH"; + + List fields = e.getFields(); + for (FieldBase f : fields) { + if (f.getName().equals("QUERY_SSDEEP")) { + querySsdeep = f.getValueString(); + } + if (f.getName().equals("MATCHING_SSDEEP")) { + matchingSsdeep = f.getValueString(); + } + observedFields.put(f.getName(), f.getValueString()); + } + + String eventKey = querySsdeep + "#" + matchingSsdeep; + observedEvents.put(eventKey, observedFields); + } + } + return observedEvents; + } + + /** + * assert that a match exists between the specified query and matching ssdeep and that the match has the expected properties + * + * @param querySsdeep + * the query ssdeep we expect to find in the match results + * @param matchingSsdeep + * the matching ssdeep we expect to find in the match results. + * @param matchScore + * the base match score + * @param matchRank + * the match rank + * @param weightedScore + * the weighted match score. + * @param observedEvents + * the map of observed events, created by extractObservedEvents on the event list obtained from query execution. + */ + public static void assertMatch(String querySsdeep, String matchingSsdeep, String matchScore, String matchRank, String weightedScore, + Map> observedEvents) { + final Map observedFields = observedEvents.get(querySsdeep + "#" + matchingSsdeep); + Assert.assertNotNull("Observed fields was null", observedFields); + Assert.assertFalse("Observed fields was unexpectedly empty", observedFields.isEmpty()); + Assert.assertEquals(matchScore, observedFields.remove("MATCH_SCORE")); + Assert.assertEquals(weightedScore, observedFields.remove("WEIGHTED_SCORE")); + Assert.assertEquals(querySsdeep, observedFields.remove("QUERY_SSDEEP")); + Assert.assertEquals(matchingSsdeep, observedFields.remove("MATCHING_SSDEEP")); + Assert.assertTrue("Observed unexpected field(s) in full match: " + observedFields, observedFields.isEmpty()); + } + + /** + * Assert that the results do not contain a match between the specified query and matching ssdeep + * + * @param querySsdeep + * the query ssdeep we do not expect to find in the match results + * @param matchingSsdeep + * the matching ssdeep we do not expect to find i nthe match results + * @param observedEvents + * the map of the observed events, created by extractObservedEvents on the event list obtained from query exeuction. + */ + public static void assertNoMatch(String querySsdeep, String matchingSsdeep, Map> observedEvents) { + final Map observedFields = observedEvents.get(querySsdeep + "#" + matchingSsdeep); + Assert.assertTrue("Observed fields was not empty", observedFields.isEmpty()); + + } } diff --git a/warehouse/query-core/src/test/java/datawave/query/transformer/SSDeepSimilarityQueryTransformerTest.java b/warehouse/query-core/src/test/java/datawave/query/transformer/SSDeepSimilarityQueryTransformerTest.java index 4cf1042f58..4d2399b7e4 100644 --- a/warehouse/query-core/src/test/java/datawave/query/transformer/SSDeepSimilarityQueryTransformerTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/transformer/SSDeepSimilarityQueryTransformerTest.java @@ -6,6 +6,7 @@ import java.util.List; import java.util.Map; +import datawave.webservice.query.QueryImpl; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Value; import org.easymock.EasyMock; @@ -52,6 +53,7 @@ public class SSDeepSimilarityQueryTransformerTest { public void basicExpects(Key k) { EasyMock.expect(mockQuery.getQueryAuthorizations()).andReturn("A,B,C"); + EasyMock.expect(mockQuery.findParameter("minScore")).andReturn(new QueryImpl.Parameter("minScore","")); EasyMock.expect(mockResponseFactory.getEventQueryResponse()).andReturn(new DefaultEventQueryResponse()); EasyMock.expect(mockResponseFactory.getEvent()).andReturn(new DefaultEvent()).times(1); EasyMock.expect(mockResponseFactory.getField()).andReturn(new DefaultField()).times(4); diff --git a/warehouse/query-core/src/test/java/datawave/query/util/ssdeep/SSDeepHashScorerTest.java b/warehouse/query-core/src/test/java/datawave/query/util/ssdeep/SSDeepHashScorerTest.java new file mode 100644 index 0000000000..ef9e74f4df --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/util/ssdeep/SSDeepHashScorerTest.java @@ -0,0 +1,40 @@ +package datawave.query.util.ssdeep; + +import org.junit.Assert; +import org.junit.Test; + +public class SSDeepHashScorerTest { + + public static final String[][] testData = { + {"3072:02irbxzGAFYDMxud7fKg3dXVmbOn5u46Kjnz/G8VYrs123D6pIJLIOSP:02MKlWQ7Sg3d4bO968rm7JO", + "3072:02irbxzGAFYDMxud7fKg3dXVmbOn5u46Kjnz/G8VYrs123D6pIJLIOSP:02MKlWQ7Sg3d4bO968rm7JO"}, + // repeated character case + {"3072:02irbxzGAFYDMxud7fKg3dXVmbOn5u46Kjnz/G8VYEEEEEEEEEEEEEEE:02MKlWQ7Sg3d4bEEEEEEEE", + "3072:02irbxzGAFYDMxud7fKg3dXVmbOn5u46Kjnz/G8VYrs123D6EEEEEEEE:02MKlWQ7Sg3d4bEEEE"}, + // chunk difference is less than 2 scales, so we can compare these. + {"3072:02irbxzGAFYDMxud7fKg3dXVmbOn5u46Kjnz/G8VYrs123D6pIJLIOSP:02MKlWQ7Sg3d4bO968rm7JO", + "6144:02MKlWQ7Sg3d4bO968rm7JORW6/ftZTgC6e8o4toHZmk6ZxoXb0ns:02C4cGCLjj9Swfj9koHEk6/Fns"}, + // inverse of the last example tests symmetry + {"6144:02MKlWQ7Sg3d4bO968rm7JORW6/ftZTgC6e8o4toHZmk6ZxoXb0ns:02C4cGCLjj9Swfj9koHEk6/Fns", + "3072:02irbxzGAFYDMxud7fKg3dXVmbOn5u46Kjnz/G8VYrs123D6pIJLIOSP:02MKlWQ7Sg3d4bO968rm7JO"}, + // chunk mismatch case + {"3072:02irbxzGAFYDMxud7fKg3dXVmbOn5u46Kjnz/G8VYrs123D6pIJLIOSP:02MKlWQ7Sg3d4bO968rm7JO", + "48:1aBhsiUw69/UXX0x0qzNkVkydf2klA8a7Z35:155w69MXAlNkmkWTF5"}, + // short hash case + {"3:aabbcc:abc", "3:aabbccdd:abcd"}, {"6:aabbcc:abc", "6:aabbccdd:abcd"} + + }; + + public static final int[] expectedScores = {100, 100, 61, 61, 0, 6, 12}; + + @Test + public void testCompare() { + SSDeepHashScorer scorer = new SSDeepHashScorer(); + for (int i = 0; i < testData.length; i++) { + SSDeepHash queryHash = SSDeepHash.parse(testData[i][0]); + SSDeepHash targetHash = SSDeepHash.parse(testData[i][1]); + int score = scorer.apply(queryHash, targetHash); + Assert.assertEquals("Expected score of " + expectedScores[i] + " for query: " + queryHash + ", target: " + targetHash, expectedScores[i], score); + } + } +} From 363167d2cf4a0cd8a2b73b729111ca61ffa0868c Mon Sep 17 00:00:00 2001 From: Ivan Bella Date: Fri, 27 Oct 2023 07:12:41 -0400 Subject: [PATCH 04/32] Fixing the composite query logic to handle long running queries (#2147) * Fixing the composite query logic to handle long running queries * Do not pass EmptyObjectExceptions all the way through the composite query logic This was resulting in was too many empty pages * Updated to rely on RunningQuery to handle intermediate results for long running queries in the CompositeQueryLogic Conflicts: web-services/query/src/main/java/datawave/webservice/query/logic/composite/CompositeQueryLogic.java web-services/query/src/main/java/datawave/webservice/query/logic/composite/CompositeQueryLogicResultsIterator.java Conflicts: web-services/query/src/main/java/datawave/webservice/query/logic/composite/CompositeQueryLogicResultsIterator.java --- .../query/transformer/GroupingTransform.java | 8 ++++ .../query/util/ssdeep/ChunkSizeEncoding.java | 30 ++++++------- .../query/util/ssdeep/IntegerEncoding.java | 30 ++++++------- .../query/util/ssdeep/SSDeepEncoding.java | 2 +- .../logic/composite/CompositeQueryLogic.java | 42 ++++++++++++++++--- .../CompositeQueryLogicResultsIterator.java | 7 +--- .../composite/CompositeQueryLogicTest.java | 32 ++++++++++++-- 7 files changed, 105 insertions(+), 46 deletions(-) diff --git a/warehouse/query-core/src/main/java/datawave/query/transformer/GroupingTransform.java b/warehouse/query-core/src/main/java/datawave/query/transformer/GroupingTransform.java index e94fe93e6c..1fe8ea283b 100644 --- a/warehouse/query-core/src/main/java/datawave/query/transformer/GroupingTransform.java +++ b/warehouse/query-core/src/main/java/datawave/query/transformer/GroupingTransform.java @@ -134,6 +134,8 @@ public Entry apply(@Nullable Entry keyDocumentEntry) long elapsedExecutionTimeForCurrentPage = System.currentTimeMillis() - this.queryExecutionForPageStartTime; if (elapsedExecutionTimeForCurrentPage > this.queryExecutionForPageTimeout) { + log.debug("Generating intermediate result because over {}ms has been reached since {}", this.queryExecutionForPageTimeout, + this.queryExecutionForPageStartTime); Document intermediateResult = new Document(); intermediateResult.setIntermediateResult(true); return Maps.immutableEntry(new Key(), intermediateResult); @@ -142,6 +144,12 @@ public Entry apply(@Nullable Entry keyDocumentEntry) return null; } + @Override + public void setQueryExecutionForPageStartTime(long queryExecutionForPageStartTime) { + log.debug("setting query execution page start time to {}", queryExecutionForPageStartTime); + super.setQueryExecutionForPageStartTime(queryExecutionForPageStartTime); + } + @Override public Entry flush() { Document document = null; diff --git a/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/ChunkSizeEncoding.java b/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/ChunkSizeEncoding.java index 2545d2f34a..9cb397be20 100644 --- a/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/ChunkSizeEncoding.java +++ b/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/ChunkSizeEncoding.java @@ -26,18 +26,18 @@ */ //@formatter:on public class ChunkSizeEncoding implements Serializable { - + private static final int MIN_CHUNK_SIZE = 3; private static final int DEFAULT_ENCODING_ALPHABET_LENGTH = HashReverse.LEXICAL_B64_TABLE.length; - + private static final int DEFAULT_ENCODING_LENGTH = 1; - + static final double L2 = Math.log(2); - + private final IntegerEncoding chunkIndexEncoding; - + final int minChunkSize; - + /** * Create a ChunkSizeEncoding with the default parameters of a 64 character encoding alphabet and a length of 1. This allows us to encode 64 distinct chunk * index values. Chunk index 0 represents the MIN_CHUNK_SIZE. See class javadocs for more info. @@ -45,43 +45,43 @@ public class ChunkSizeEncoding implements Serializable { public ChunkSizeEncoding() { this(MIN_CHUNK_SIZE, DEFAULT_ENCODING_ALPHABET_LENGTH, DEFAULT_ENCODING_LENGTH); } - + public ChunkSizeEncoding(int minChunkSize, int encodingAlphabetLength, int encodingLength) { this.minChunkSize = minChunkSize; this.chunkIndexEncoding = new IntegerEncoding(encodingAlphabetLength, encodingLength); } - + public long getLimit() { return findChunkSizeIndex(chunkIndexEncoding.getLimit()); } - + public int getLength() { return chunkIndexEncoding.getLength(); } - + public long findNthChunkSize(int index) { return minChunkSize * ((long) Math.pow(2, index)); } - + public int findChunkSizeIndex(long chunkSize) { return (int) (Math.log(chunkSize / (float) minChunkSize) / L2); } - + public String encode(int chunkSize) { int index = findChunkSizeIndex(chunkSize); return chunkIndexEncoding.encode(index); } - + public byte[] encodeToBytes(int chunkSize, byte[] buffer, int offset) { int index = findChunkSizeIndex(chunkSize); return chunkIndexEncoding.encodeToBytes(index, buffer, offset); } - + public int decode(String encoded) { int index = chunkIndexEncoding.decode(encoded); return (int) findNthChunkSize(index); } - + public int decode(byte[] encoded, int offset) { int index = chunkIndexEncoding.decode(encoded, offset); return (int) findNthChunkSize(index); diff --git a/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/IntegerEncoding.java b/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/IntegerEncoding.java index 6f11163319..b7b76238ec 100644 --- a/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/IntegerEncoding.java +++ b/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/IntegerEncoding.java @@ -7,20 +7,20 @@ * Class for encoding integers into a lexically sorted output of constant length. Employs the sorted Base64 alphabet captured in the HashReverse class. */ public class IntegerEncoding implements Serializable { - + // The number of distinct characters used for encoding final int base; // the target length of the encoding final int length; // the max integer value we can encode, derived from the base and length parameters. final int limit; - + /** * We are using the LEXICAL_B64_TABLE to encode integers to characters, our max base (the unique characters we use for encoding) is based on the size of * this alphabet. */ private static final int MAX_BASE = HashReverse.LEXICAL_B64_TABLE.length; - + /** * Create an unsigned integer encoder that uses the specified base (up to 64) and length (which can't generate numbers larger than Integer.MAX_VALUE). This * uses the lexically sorted Base 64 alphabet for encoding. @@ -45,21 +45,21 @@ public IntegerEncoding(int base, int length) { } this.limit = (int) calculatedLimit; // truncation is fine here. } - + /** Return the maximum value this encoder can encode */ public int getLimit() { return limit; } - + public int getLength() { return length; } - + /** Encode the provided value, return a string result */ public String encode(int value) { return new String(encodeToBytes(value, new byte[length], 0)); } - + /** * encode the provided value, writing the result to the provided buffer starting offset * @@ -75,11 +75,11 @@ public byte[] encodeToBytes(int value, byte[] buffer, int offset) { if (value < 0 || value >= limit) { throw new IllegalArgumentException("Can't encode " + value + " is it out of range, max: " + limit + " was: " + value); } - + if (buffer.length < offset + length) { throw new IndexOutOfBoundsException("Can't encode a value of length " + length + " at offset " + offset + " buffer too small: " + buffer.length); } - + int remaining = value; for (int place = length; place > 0; place--) { final int scale = ((int) Math.pow(base, place - 1)); @@ -92,7 +92,7 @@ public byte[] encodeToBytes(int value, byte[] buffer, int offset) { } return buffer; } - + // TODO: make this just like encodeToBytes? public static byte[] encodeBaseTenDigitBytes(int value) { int remaining = value; @@ -108,7 +108,7 @@ public static byte[] encodeBaseTenDigitBytes(int value) { } return results; } - + /** * Decode the first _length_ characters in the encoded value into an integer, where length is specified in the constructor. * @@ -122,7 +122,7 @@ public int decode(String encodedValue) { } return decode(encodedValue.getBytes(StandardCharsets.UTF_8), 0); } - + /** * decode the value contained within the provided byte[] starting at the specified offset * @@ -140,7 +140,7 @@ public int decode(byte[] encoded, int offset) { if (encoded.length < offset + length) { throw new IndexOutOfBoundsException("Can't decode a value of length " + length + " from offset " + offset + " buffer too small: " + encoded.length); } - + int result = 0; for (int place = length; place > 0; place--) { int pos = offset + (length - place); @@ -150,11 +150,11 @@ public int decode(byte[] encoded, int offset) { } result += (int) Math.pow(base, place - 1) * value; } - + if (result > limit) { throw new IllegalArgumentException("Can't decode input is it out of range, max: " + limit + " was: " + result); } - + return result; } } diff --git a/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/SSDeepEncoding.java b/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/SSDeepEncoding.java index a067bd200a..955186cd2b 100644 --- a/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/SSDeepEncoding.java +++ b/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/SSDeepEncoding.java @@ -11,7 +11,7 @@ public class SSDeepEncoding implements Serializable { public byte[] encode(String ngram) { return encodeToBytes(ngram, new byte[ngram.length()], 0); } - + public byte[] encodeToBytes(String ngram, byte[] buffer, int offset) { for (int i = 0; i < ngram.length(); i++) { buffer[i + offset] = (byte) ngram.charAt(i); diff --git a/web-services/query/src/main/java/datawave/webservice/query/logic/composite/CompositeQueryLogic.java b/web-services/query/src/main/java/datawave/webservice/query/logic/composite/CompositeQueryLogic.java index 783524f1d2..f689fa7a80 100644 --- a/web-services/query/src/main/java/datawave/webservice/query/logic/composite/CompositeQueryLogic.java +++ b/web-services/query/src/main/java/datawave/webservice/query/logic/composite/CompositeQueryLogic.java @@ -36,6 +36,7 @@ import datawave.webservice.query.logic.BaseQueryLogic; import datawave.webservice.query.logic.QueryLogic; import datawave.webservice.query.logic.QueryLogicTransformer; +import datawave.webservice.query.result.event.EventBase; import datawave.webservice.result.BaseResponse; /** @@ -121,20 +122,38 @@ public void run() { started = true; } + // ensure we start with a reasonable page time + resetPageProcessingStartTime(); + // the results queue is also an exception handler setUncaughtExceptionHandler(results); boolean success = false; try { Object last = new Object(); - if (this.getMaxResults() < 0) + if (this.getMaxResults() <= 0) this.setMaxResults(Long.MAX_VALUE); while ((null != last) && !interrupted && transformIterator.hasNext() && (resultCount < this.getMaxResults())) { try { last = transformIterator.next(); if (null != last) { - log.debug(Thread.currentThread().getName() + ": Added object to results"); - results.add(last); + log.debug(Thread.currentThread().getName() + ": Got result"); + + // special logic to deal with intermediate results + if (last instanceof EventBase && ((EventBase) last).isIntermediateResult()) { + resetPageProcessingStartTime(); + // reset the page processing time to avoid getting spammed with these + // let the RunningQuery handle timeouts for long-running queries + if (isLongRunningQuery()) { + last = null; + } + } + + if (last != null) { + results.add(last); + resultCount++; + log.debug(Thread.currentThread().getName() + ": Added result to queue"); + } } } catch (InterruptedException e) { // if this was on purpose, then just log and the loop will naturally exit @@ -146,10 +165,8 @@ public void run() { throw new RuntimeException(e); } } catch (EmptyObjectException eoe) { - // Adding an empty object exception to the results queue needs to be passed all the way out. - results.add(eoe); + // ignore these } - resultCount++; } success = true; } catch (Exception e) { @@ -162,6 +179,9 @@ public void run() { } } + public void resetPageProcessingStartTime() { + logic.setPageProcessingStartTime(System.currentTimeMillis()); + } } protected static final Logger log = Logger.getLogger(CompositeQueryLogic.class); @@ -615,6 +635,16 @@ public void setPageProcessingStartTime(long pageProcessingStartTime) { } } + @Override + public boolean isLongRunningQuery() { + for (QueryLogic l : getQueryLogics().values()) { + if (l.isLongRunningQuery()) { + return true; + } + } + return false; + } + public boolean isAllMustInitialize() { return getConfig().isAllMustInitialize(); } diff --git a/web-services/query/src/main/java/datawave/webservice/query/logic/composite/CompositeQueryLogicResultsIterator.java b/web-services/query/src/main/java/datawave/webservice/query/logic/composite/CompositeQueryLogicResultsIterator.java index b35c6ccb96..bb412ce53b 100644 --- a/web-services/query/src/main/java/datawave/webservice/query/logic/composite/CompositeQueryLogicResultsIterator.java +++ b/web-services/query/src/main/java/datawave/webservice/query/logic/composite/CompositeQueryLogicResultsIterator.java @@ -67,9 +67,7 @@ public boolean hasNext() { } } if (nextEntry != null) { - if (!(nextEntry instanceof EmptyObjectException)) { - seenEntries = true; - } + seenEntries = true; return true; } return false; @@ -88,9 +86,6 @@ public Object next() { nextEntry = null; } } - if (current instanceof EmptyObjectException) { - throw new EmptyObjectException(); - } return current; } diff --git a/web-services/query/src/test/java/datawave/webservice/query/logic/composite/CompositeQueryLogicTest.java b/web-services/query/src/test/java/datawave/webservice/query/logic/composite/CompositeQueryLogicTest.java index fdce9e9203..cdc7b152b4 100644 --- a/web-services/query/src/test/java/datawave/webservice/query/logic/composite/CompositeQueryLogicTest.java +++ b/web-services/query/src/test/java/datawave/webservice/query/logic/composite/CompositeQueryLogicTest.java @@ -397,6 +397,11 @@ public QueryLogicTransformer getTransformer(Query settings) { public GenericQueryConfiguration initialize(AccumuloClient client, Query settings, Set runtimeQueryAuthorizations) throws Exception { return new TestQueryConfiguration(); } + + @Override + public boolean isLongRunningQuery() { + return true; + } } public static class DifferentTestQueryLogic extends BaseQueryLogic> { @@ -1145,7 +1150,7 @@ public void testQueryLogicWithMaxResultsOverride() throws Exception { CompositeQueryLogic c = new CompositeQueryLogic(); // max.results.override is set to -1 when it is not passed in as it is an optional parameter - logic1.setMaxResults(0); + logic1.setMaxResults(1); logic2.setMaxResults(4); /** * RunningQuery.setupConnection() @@ -1167,14 +1172,14 @@ public void testQueryLogicWithMaxResultsOverride() throws Exception { Assert.assertTrue(o instanceof TestQueryResponse); results.add(o); } - Assert.assertEquals(4, results.size()); + Assert.assertEquals(5, results.size()); ResultsPage page = new ResultsPage(results, Status.COMPLETE); /** * QueryExecutorBean.next() - transform list of objects into JAXB response */ TestQueryResponseList response = (TestQueryResponseList) c.getEnrichedTransformer((Query) settings).createResponse(page); - Assert.assertEquals(4, response.getResponses().size()); + Assert.assertEquals(5, response.getResponses().size()); for (TestQueryResponse r : response.getResponses()) { Assert.assertNotNull(r); } @@ -1426,6 +1431,27 @@ public void testCannotRunQueryLogic2() throws Exception { } + @Test + public void testIsLongRunningQuery() throws Exception { + Map> logics = new HashMap<>(); + TestQueryLogic logic1 = new TestQueryLogic(); + TestQueryLogic logic2 = new TestQueryLogic(); + logics.put("TestQueryLogic", logic1); + logics.put("TestQueryLogic2", logic2); + + CompositeQueryLogic c = new CompositeQueryLogic(); + c.setQueryLogics(logics); + + Assert.assertFalse(c.isLongRunningQuery()); + + TestQueryLogic2 logic3 = new TestQueryLogic2(); + logics.put("TestQueryLogic3", logic3); + + c.setQueryLogics(logics); + + Assert.assertTrue(c.isLongRunningQuery()); + } + @Test public void testAuthorizationsUpdate() throws Exception { Map> logics = new HashMap<>(); From c5d0de145ed7c8f648355dc63d90ddba6ffe7d1f Mon Sep 17 00:00:00 2001 From: austin007008 <143425397+austin007008@users.noreply.github.com> Date: Fri, 3 Nov 2023 08:37:02 -0400 Subject: [PATCH 05/32] fix integration (#2157) import sort --- .../src/main/java/datawave/query/tld/TLD.java | 3 +- .../query/util/ssdeep/ChunkSizeEncoding.java | 30 ++++++++-------- .../query/util/ssdeep/IntegerEncoding.java | 30 ++++++++-------- .../query/util/ssdeep/SSDeepEncoding.java | 2 +- .../query/util/ssdeep/SSDeepHashScorer.java | 36 +++++++++---------- .../SSDeepSimilarityQueryTransformerTest.java | 4 +-- 6 files changed, 53 insertions(+), 52 deletions(-) diff --git a/warehouse/query-core/src/main/java/datawave/query/tld/TLD.java b/warehouse/query-core/src/main/java/datawave/query/tld/TLD.java index 8b5066223e..b779dd3f17 100644 --- a/warehouse/query-core/src/main/java/datawave/query/tld/TLD.java +++ b/warehouse/query-core/src/main/java/datawave/query/tld/TLD.java @@ -28,7 +28,8 @@ private TLD() {} /** * Parses the pointer (document id) from the local Field Index key's ColumnQualifier - * + *

+ *
* FI Key Structure (row, cf='fi\0field', cq='value\0datatype\0uid') * * The uid is starts at the ColumnQualifier's second null byte, ends at the end of sequence. diff --git a/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/ChunkSizeEncoding.java b/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/ChunkSizeEncoding.java index 9cb397be20..2545d2f34a 100644 --- a/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/ChunkSizeEncoding.java +++ b/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/ChunkSizeEncoding.java @@ -26,18 +26,18 @@ */ //@formatter:on public class ChunkSizeEncoding implements Serializable { - + private static final int MIN_CHUNK_SIZE = 3; private static final int DEFAULT_ENCODING_ALPHABET_LENGTH = HashReverse.LEXICAL_B64_TABLE.length; - + private static final int DEFAULT_ENCODING_LENGTH = 1; - + static final double L2 = Math.log(2); - + private final IntegerEncoding chunkIndexEncoding; - + final int minChunkSize; - + /** * Create a ChunkSizeEncoding with the default parameters of a 64 character encoding alphabet and a length of 1. This allows us to encode 64 distinct chunk * index values. Chunk index 0 represents the MIN_CHUNK_SIZE. See class javadocs for more info. @@ -45,43 +45,43 @@ public class ChunkSizeEncoding implements Serializable { public ChunkSizeEncoding() { this(MIN_CHUNK_SIZE, DEFAULT_ENCODING_ALPHABET_LENGTH, DEFAULT_ENCODING_LENGTH); } - + public ChunkSizeEncoding(int minChunkSize, int encodingAlphabetLength, int encodingLength) { this.minChunkSize = minChunkSize; this.chunkIndexEncoding = new IntegerEncoding(encodingAlphabetLength, encodingLength); } - + public long getLimit() { return findChunkSizeIndex(chunkIndexEncoding.getLimit()); } - + public int getLength() { return chunkIndexEncoding.getLength(); } - + public long findNthChunkSize(int index) { return minChunkSize * ((long) Math.pow(2, index)); } - + public int findChunkSizeIndex(long chunkSize) { return (int) (Math.log(chunkSize / (float) minChunkSize) / L2); } - + public String encode(int chunkSize) { int index = findChunkSizeIndex(chunkSize); return chunkIndexEncoding.encode(index); } - + public byte[] encodeToBytes(int chunkSize, byte[] buffer, int offset) { int index = findChunkSizeIndex(chunkSize); return chunkIndexEncoding.encodeToBytes(index, buffer, offset); } - + public int decode(String encoded) { int index = chunkIndexEncoding.decode(encoded); return (int) findNthChunkSize(index); } - + public int decode(byte[] encoded, int offset) { int index = chunkIndexEncoding.decode(encoded, offset); return (int) findNthChunkSize(index); diff --git a/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/IntegerEncoding.java b/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/IntegerEncoding.java index b7b76238ec..6f11163319 100644 --- a/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/IntegerEncoding.java +++ b/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/IntegerEncoding.java @@ -7,20 +7,20 @@ * Class for encoding integers into a lexically sorted output of constant length. Employs the sorted Base64 alphabet captured in the HashReverse class. */ public class IntegerEncoding implements Serializable { - + // The number of distinct characters used for encoding final int base; // the target length of the encoding final int length; // the max integer value we can encode, derived from the base and length parameters. final int limit; - + /** * We are using the LEXICAL_B64_TABLE to encode integers to characters, our max base (the unique characters we use for encoding) is based on the size of * this alphabet. */ private static final int MAX_BASE = HashReverse.LEXICAL_B64_TABLE.length; - + /** * Create an unsigned integer encoder that uses the specified base (up to 64) and length (which can't generate numbers larger than Integer.MAX_VALUE). This * uses the lexically sorted Base 64 alphabet for encoding. @@ -45,21 +45,21 @@ public IntegerEncoding(int base, int length) { } this.limit = (int) calculatedLimit; // truncation is fine here. } - + /** Return the maximum value this encoder can encode */ public int getLimit() { return limit; } - + public int getLength() { return length; } - + /** Encode the provided value, return a string result */ public String encode(int value) { return new String(encodeToBytes(value, new byte[length], 0)); } - + /** * encode the provided value, writing the result to the provided buffer starting offset * @@ -75,11 +75,11 @@ public byte[] encodeToBytes(int value, byte[] buffer, int offset) { if (value < 0 || value >= limit) { throw new IllegalArgumentException("Can't encode " + value + " is it out of range, max: " + limit + " was: " + value); } - + if (buffer.length < offset + length) { throw new IndexOutOfBoundsException("Can't encode a value of length " + length + " at offset " + offset + " buffer too small: " + buffer.length); } - + int remaining = value; for (int place = length; place > 0; place--) { final int scale = ((int) Math.pow(base, place - 1)); @@ -92,7 +92,7 @@ public byte[] encodeToBytes(int value, byte[] buffer, int offset) { } return buffer; } - + // TODO: make this just like encodeToBytes? public static byte[] encodeBaseTenDigitBytes(int value) { int remaining = value; @@ -108,7 +108,7 @@ public static byte[] encodeBaseTenDigitBytes(int value) { } return results; } - + /** * Decode the first _length_ characters in the encoded value into an integer, where length is specified in the constructor. * @@ -122,7 +122,7 @@ public int decode(String encodedValue) { } return decode(encodedValue.getBytes(StandardCharsets.UTF_8), 0); } - + /** * decode the value contained within the provided byte[] starting at the specified offset * @@ -140,7 +140,7 @@ public int decode(byte[] encoded, int offset) { if (encoded.length < offset + length) { throw new IndexOutOfBoundsException("Can't decode a value of length " + length + " from offset " + offset + " buffer too small: " + encoded.length); } - + int result = 0; for (int place = length; place > 0; place--) { int pos = offset + (length - place); @@ -150,11 +150,11 @@ public int decode(byte[] encoded, int offset) { } result += (int) Math.pow(base, place - 1) * value; } - + if (result > limit) { throw new IllegalArgumentException("Can't decode input is it out of range, max: " + limit + " was: " + result); } - + return result; } } diff --git a/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/SSDeepEncoding.java b/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/SSDeepEncoding.java index 955186cd2b..a067bd200a 100644 --- a/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/SSDeepEncoding.java +++ b/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/SSDeepEncoding.java @@ -11,7 +11,7 @@ public class SSDeepEncoding implements Serializable { public byte[] encode(String ngram) { return encodeToBytes(ngram, new byte[ngram.length()], 0); } - + public byte[] encodeToBytes(String ngram, byte[] buffer, int offset) { for (int i = 0; i < ngram.length(); i++) { buffer[i + offset] = (byte) ngram.charAt(i); diff --git a/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/SSDeepHashScorer.java b/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/SSDeepHashScorer.java index fce7cb5d66..428a0405c1 100644 --- a/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/SSDeepHashScorer.java +++ b/warehouse/query-core/src/main/java/datawave/query/util/ssdeep/SSDeepHashScorer.java @@ -6,17 +6,17 @@ /** Implements functions to calculate a similarity score for a pair of SSDeepHashes */ public class SSDeepHashScorer { private static final Logger log = Logger.getLogger(SSDeepHash.class); - + private final int maxRepeatedCharacters; - + public SSDeepHashScorer() { this(SSDeepHash.DEFAULT_MAX_REPEATED_CHARACTERS); } - + public SSDeepHashScorer(int maxRepeatedCharacters) { this.maxRepeatedCharacters = maxRepeatedCharacters; } - + /** * Compare two ssdeep hashes, returning a score between 0 to 100 that indicates similarity. A score of 0 means that the items are not similar at all whereas * a score of 100 indicates a high degree of similarity. @@ -33,7 +33,7 @@ public int apply(SSDeepHash signature1, SSDeepHash signature2) { } final long chunkSize1 = signature1.getChunkSize(); final long chunkSize2 = signature2.getChunkSize(); - + // We require the chunk size to either be equal, or for one to be twice the other. If the chunk sizes don't // match then we are comparing apples to oranges. This isn't an 'error' per se. We could have two valid // ssdeep hashes, but with chunk sizes so different they can't be compared. @@ -43,14 +43,14 @@ public int apply(SSDeepHash signature1, SSDeepHash signature2) { } return 0; } - + // There is very little information content in sequences of the same character like 'LLLLL'. Eliminate any // sequences longer than MAX_REPEATED_CHARACTERS (3). final String s1chunk = SSDeepHash.normalizeSSDeepChunk(signature1.getChunk(), maxRepeatedCharacters); final String s1doubleChunk = SSDeepHash.normalizeSSDeepChunk(signature1.getDoubleChunk(), maxRepeatedCharacters); final String s2chunk = SSDeepHash.normalizeSSDeepChunk(signature2.getChunk(), maxRepeatedCharacters); final String s2doubleChunk = SSDeepHash.normalizeSSDeepChunk(signature2.getDoubleChunk(), maxRepeatedCharacters); - + // Each ssdeep has two chunks with different chunk sizes. Choose which ones to use from each hash for scoring. final long score; if (chunkSize1 == chunkSize2) { @@ -65,10 +65,10 @@ public int apply(SSDeepHash signature1, SSDeepHash signature2) { // The second ssdeep has twice the chunk size of the first. score = scoreChunks(s1doubleChunk, s2chunk, chunkSize2); } - + return (int) score; } - + /** * This is the low level chunk scoring algorithm. It takes two chunks and scores them on a scale of 0-100 where 0 is a terrible match and 100 is a great * match. The chunkSize is used to cope with very small messages. @@ -76,44 +76,44 @@ public int apply(SSDeepHash signature1, SSDeepHash signature2) { private static int scoreChunks(final String s1, final String s2, final long chunkSize) { final int len1 = s1.length(); final int len2 = s2.length(); - + if ((len1 > SSDeepHash.CHUNK_LENGTH) || (len2 > SSDeepHash.CHUNK_LENGTH)) { // one of the chunk lengths exceeds the max chunk length, perhaps it is not a real ssdeep? return 0; } - + // Compute the edit distance between the two chunk strings. The edit distance gives us a pretty good idea of // how closely related the two chunks are. int editDistance = LevenshteinDistance.getDefaultInstance().apply(s1, s2); if (log.isDebugEnabled()) { log.debug("edit_dist: " + editDistance); } - + // Scale the edit distance by the lengths of the two chunks. This changes the baseScore to be a measure of the // proportion of the message that has changed rather than an absolute quantity. It also copes with the // variability of the chunk string lengths. int score = (editDistance * SSDeepHash.CHUNK_LENGTH) / (len1 + len2); - + // At this stage the baseScore occurs roughly on a 0-64 scale, // with 0 being a good match and 64 being a complete mismatch. - + // Rescale to a 0-100 scale (friendlier to humans). score = (100 * score) / SSDeepHash.CHUNK_LENGTH; - + // It is possible to get a baseScore above 100 here, but it is a really terrible match. if (score >= 100) { return 0; } - + // Invert the score with 0 being a poor match and 100 being a excellent match. score = 100 - score; - + // When the chunk size is small we don't want to exaggerate the match. final int threshold = (int) (chunkSize / SSDeepHash.MIN_CHUNK_SIZE * Math.min(len1, len2)); if (score > threshold) { score = threshold; } - + return score; } } diff --git a/warehouse/query-core/src/test/java/datawave/query/transformer/SSDeepSimilarityQueryTransformerTest.java b/warehouse/query-core/src/test/java/datawave/query/transformer/SSDeepSimilarityQueryTransformerTest.java index 4d2399b7e4..b5b461e1c3 100644 --- a/warehouse/query-core/src/test/java/datawave/query/transformer/SSDeepSimilarityQueryTransformerTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/transformer/SSDeepSimilarityQueryTransformerTest.java @@ -6,7 +6,6 @@ import java.util.List; import java.util.Map; -import datawave.webservice.query.QueryImpl; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Value; import org.easymock.EasyMock; @@ -29,6 +28,7 @@ import datawave.query.util.ssdeep.NGramTuple; import datawave.query.util.ssdeep.SSDeepHash; import datawave.webservice.query.Query; +import datawave.webservice.query.QueryImpl; import datawave.webservice.query.result.event.DefaultEvent; import datawave.webservice.query.result.event.DefaultField; import datawave.webservice.query.result.event.ResponseObjectFactory; @@ -53,7 +53,7 @@ public class SSDeepSimilarityQueryTransformerTest { public void basicExpects(Key k) { EasyMock.expect(mockQuery.getQueryAuthorizations()).andReturn("A,B,C"); - EasyMock.expect(mockQuery.findParameter("minScore")).andReturn(new QueryImpl.Parameter("minScore","")); + EasyMock.expect(mockQuery.findParameter("minScore")).andReturn(new QueryImpl.Parameter("minScore", "")); EasyMock.expect(mockResponseFactory.getEventQueryResponse()).andReturn(new DefaultEventQueryResponse()); EasyMock.expect(mockResponseFactory.getEvent()).andReturn(new DefaultEvent()).times(1); EasyMock.expect(mockResponseFactory.getField()).andReturn(new DefaultField()).times(4); From 7299f95047ea983e53e97ee44a70a80d2828fe79 Mon Sep 17 00:00:00 2001 From: Ivan Bella Date: Tue, 17 Oct 2023 08:42:29 -0400 Subject: [PATCH 06/32] Adding a short lived cache around the remote user operations (#2128) --- warehouse/core/src/main/resources/CacheContext.xml | 3 +-- .../authorization/remote/RemoteUserOperationsImpl.java | 7 +++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/warehouse/core/src/main/resources/CacheContext.xml b/warehouse/core/src/main/resources/CacheContext.xml index 400bfc9e8b..a8ae9dc4c8 100644 --- a/warehouse/core/src/main/resources/CacheContext.xml +++ b/warehouse/core/src/main/resources/CacheContext.xml @@ -22,8 +22,7 @@ - + - diff --git a/web-services/security/src/main/java/datawave/security/authorization/remote/RemoteUserOperationsImpl.java b/web-services/security/src/main/java/datawave/security/authorization/remote/RemoteUserOperationsImpl.java index c17655c8a0..d782a03c07 100644 --- a/web-services/security/src/main/java/datawave/security/authorization/remote/RemoteUserOperationsImpl.java +++ b/web-services/security/src/main/java/datawave/security/authorization/remote/RemoteUserOperationsImpl.java @@ -50,6 +50,13 @@ public void init() { } @Override + @Cacheable(value = "getRemoteUser", key = "{#principal}", cacheManager = "remoteUserOperationsCacheManager") + public DatawavePrincipal getRemoteUser(DatawavePrincipal principal) throws AuthorizationException { + return UserOperations.super.getRemoteUser(principal); + } + + @Override + @Cacheable(value = "listEffectiveAuthorizations", key = "{#callerObject}", cacheManager = "remoteUserOperationsCacheManager") public AuthorizationsListBase listEffectiveAuthorizations(Object callerObject) throws AuthorizationException { init(); final DatawavePrincipal principal = getDatawavePrincipal(callerObject); From 1a82e8b364cf96e4b2ef4604418c8901a15ddb66 Mon Sep 17 00:00:00 2001 From: Ivan Bella Date: Mon, 6 Nov 2023 13:52:09 +0000 Subject: [PATCH 07/32] resolve merge conflict for short lived remote user operations cache --- .../authorization/remote/RemoteUserOperationsImpl.java | 6 ------ 1 file changed, 6 deletions(-) diff --git a/web-services/security/src/main/java/datawave/security/authorization/remote/RemoteUserOperationsImpl.java b/web-services/security/src/main/java/datawave/security/authorization/remote/RemoteUserOperationsImpl.java index d782a03c07..51c976e2ed 100644 --- a/web-services/security/src/main/java/datawave/security/authorization/remote/RemoteUserOperationsImpl.java +++ b/web-services/security/src/main/java/datawave/security/authorization/remote/RemoteUserOperationsImpl.java @@ -48,12 +48,6 @@ public void init() { initialized = true; } } - - @Override - @Cacheable(value = "getRemoteUser", key = "{#principal}", cacheManager = "remoteUserOperationsCacheManager") - public DatawavePrincipal getRemoteUser(DatawavePrincipal principal) throws AuthorizationException { - return UserOperations.super.getRemoteUser(principal); - } @Override @Cacheable(value = "listEffectiveAuthorizations", key = "{#callerObject}", cacheManager = "remoteUserOperationsCacheManager") From 32fc6df81b68687a99600918fbd728aa52588657 Mon Sep 17 00:00:00 2001 From: Ivan Bella Date: Mon, 6 Nov 2023 19:56:49 +0000 Subject: [PATCH 08/32] formatting --- .../security/authorization/remote/RemoteUserOperationsImpl.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web-services/security/src/main/java/datawave/security/authorization/remote/RemoteUserOperationsImpl.java b/web-services/security/src/main/java/datawave/security/authorization/remote/RemoteUserOperationsImpl.java index 51c976e2ed..e7a497c294 100644 --- a/web-services/security/src/main/java/datawave/security/authorization/remote/RemoteUserOperationsImpl.java +++ b/web-services/security/src/main/java/datawave/security/authorization/remote/RemoteUserOperationsImpl.java @@ -48,7 +48,7 @@ public void init() { initialized = true; } } - + @Override @Cacheable(value = "listEffectiveAuthorizations", key = "{#callerObject}", cacheManager = "remoteUserOperationsCacheManager") public AuthorizationsListBase listEffectiveAuthorizations(Object callerObject) throws AuthorizationException { From 5e6c0cfa962866684eb9c40895a66561769b8656 Mon Sep 17 00:00:00 2001 From: Whitney O'Meara Date: Tue, 7 Nov 2023 15:00:47 +0000 Subject: [PATCH 09/32] Updated the audit service submodule --- microservices/services/audit | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/microservices/services/audit b/microservices/services/audit index e3c3e32e94..ea0f0cefd9 160000 --- a/microservices/services/audit +++ b/microservices/services/audit @@ -1 +1 @@ -Subproject commit e3c3e32e94df3193ab33e9877609806f9c2a3079 +Subproject commit ea0f0cefd9974cc7ec6efe359d0005196b3eb797 From 73885fb9e8e8b8875885dfe369d93fa508e06204 Mon Sep 17 00:00:00 2001 From: Laura Schanno Date: Mon, 13 Nov 2023 13:03:01 -0500 Subject: [PATCH 10/32] Add aggregation functionality for #GROUP_BY function (#1914) * Add aggregation functionality for #GROUP_BY function * Provide the ability to aggregate fields when grouping. Specifically, add the new functions #SUM, #MIN, #MAX, #COUNT, #AVERAGE that will determine the aggregate value for any specified fields against the entries that match any particular grouping. Additionally, add GroupAggregateFields to act as a central place to * Updated to specify a reverse model mapping separate from the inverseModelMap to ensure unique reverse mappings * Reduce the fields to be the mapped set of fields and avoid remapping the fields everywhere that we do not need to. --------- Co-authored-by: Ivan Bella --- .../main/java/datawave/query/Constants.java | 8 + .../java/datawave/query/QueryParameters.java | 29 + .../common/grouping/AbstractAggregator.java | 46 + .../common/grouping/AggregateOperation.java | 8 + .../query/common/grouping/Aggregator.java | 81 ++ .../common/grouping/AverageAggregator.java | 157 ++ .../common/grouping/CountAggregator.java | 103 ++ .../common/grouping/DocumentGrouper.java | 717 ++++++++++ .../datawave/query/common/grouping/Field.java | 124 ++ .../common/grouping/FieldAggregator.java | 404 ++++++ .../datawave/query/common/grouping/Group.java | 168 +++ .../query/common/grouping/GroupFields.java | 466 ++++++ .../query/common/grouping/Grouping.java | 118 ++ .../common/grouping/GroupingAttribute.java | 62 + .../query/common/grouping/GroupingUtil.java | 330 ----- .../query/common/grouping/GroupingUtils.java | 267 ++++ .../query/common/grouping/Groups.java | 108 ++ .../common/grouping/ImmutableGrouping.java | 95 ++ .../query/common/grouping/MaxAggregator.java | 110 ++ .../query/common/grouping/MinAggregator.java | 113 ++ .../query/common/grouping/SumAggregator.java | 117 ++ .../query/config/ShardQueryConfiguration.java | 34 +- .../query/iterator/GroupingIterator.java | 118 +- .../query/iterator/QueryIterator.java | 4 +- .../datawave/query/iterator/QueryOptions.java | 14 +- .../query/jexl/functions/QueryFunctions.java | 7 +- .../functions/QueryFunctionsDescriptor.java | 5 + .../QueryOptionsFromQueryVisitor.java | 33 +- .../language/functions/jexl/Average.java | 48 + .../query/language/functions/jexl/Count.java | 48 + .../query/language/functions/jexl/Max.java | 48 + .../query/language/functions/jexl/Min.java | 48 + .../query/language/functions/jexl/Sum.java | 48 + .../query/planner/DefaultQueryPlanner.java | 28 +- .../query/planner/QueryOptionsSwitch.java | 61 +- .../query/tables/ShardQueryLogic.java | 73 +- .../query/transformer/GroupingTransform.java | 97 +- .../grouping/AverageAggregatorTest.java | 73 + .../common/grouping/CountAggregatorTest.java | 52 + .../common/grouping/DocumentGrouperTest.java | 1100 ++++++++++++++ .../common/grouping/GroupFieldsTest.java | 247 ++++ .../common/grouping/MaxAggregatorTest.java | 123 ++ .../common/grouping/MinAggregatorTest.java | 119 ++ .../common/grouping/SumAggregatorTest.java | 74 + .../config/ShardQueryConfigurationTest.java | 8 +- .../query/transformer/GroupingTest.java | 1262 +++++++++++------ .../transformer/GroupingTestWithModel.java | 534 ------- .../query/util/VisibilityWiseGuysIngest.java | 98 +- .../VisibilityWiseGuysIngestWithModel.java | 98 +- .../test/java/datawave/test/GroupAssert.java | 113 ++ .../test/java/datawave/test/GroupsAssert.java | 42 + .../datawave/query/QueryLogicFactory.xml | 5 + .../datawave/query/QueryLogicFactory.xml | 5 + 53 files changed, 6656 insertions(+), 1642 deletions(-) create mode 100644 warehouse/query-core/src/main/java/datawave/query/common/grouping/AbstractAggregator.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/common/grouping/AggregateOperation.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/common/grouping/Aggregator.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/common/grouping/AverageAggregator.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/common/grouping/CountAggregator.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/common/grouping/DocumentGrouper.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/common/grouping/Field.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/common/grouping/FieldAggregator.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/common/grouping/Group.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupFields.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/common/grouping/Grouping.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupingAttribute.java delete mode 100644 warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupingUtil.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupingUtils.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/common/grouping/Groups.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/common/grouping/ImmutableGrouping.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/common/grouping/MaxAggregator.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/common/grouping/MinAggregator.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/common/grouping/SumAggregator.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/Average.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/Count.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/Max.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/Min.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/Sum.java create mode 100644 warehouse/query-core/src/test/java/datawave/query/common/grouping/AverageAggregatorTest.java create mode 100644 warehouse/query-core/src/test/java/datawave/query/common/grouping/CountAggregatorTest.java create mode 100644 warehouse/query-core/src/test/java/datawave/query/common/grouping/DocumentGrouperTest.java create mode 100644 warehouse/query-core/src/test/java/datawave/query/common/grouping/GroupFieldsTest.java create mode 100644 warehouse/query-core/src/test/java/datawave/query/common/grouping/MaxAggregatorTest.java create mode 100644 warehouse/query-core/src/test/java/datawave/query/common/grouping/MinAggregatorTest.java create mode 100644 warehouse/query-core/src/test/java/datawave/query/common/grouping/SumAggregatorTest.java delete mode 100644 warehouse/query-core/src/test/java/datawave/query/transformer/GroupingTestWithModel.java create mode 100644 warehouse/query-core/src/test/java/datawave/test/GroupAssert.java create mode 100644 warehouse/query-core/src/test/java/datawave/test/GroupsAssert.java diff --git a/warehouse/query-core/src/main/java/datawave/query/Constants.java b/warehouse/query-core/src/main/java/datawave/query/Constants.java index bf1141a76e..c1ab9ff4f5 100644 --- a/warehouse/query-core/src/main/java/datawave/query/Constants.java +++ b/warehouse/query-core/src/main/java/datawave/query/Constants.java @@ -30,8 +30,16 @@ public class Constants { public static final String BRACKET_END = "]"; + public static final String EQUALS = "="; + public static final String FORWARD_SLASH = "/"; + public static final String LEFT_PAREN = "("; + + public static final String RIGHT_PAREN = ")"; + + public static final String PIPE = "|"; + public static final Text TEXT_NULL = new Text(NULL); public static final Text FI_PREFIX = new Text("fi"); diff --git a/warehouse/query-core/src/main/java/datawave/query/QueryParameters.java b/warehouse/query-core/src/main/java/datawave/query/QueryParameters.java index a0f0e84d34..8a2ce76e09 100644 --- a/warehouse/query-core/src/main/java/datawave/query/QueryParameters.java +++ b/warehouse/query-core/src/main/java/datawave/query/QueryParameters.java @@ -142,7 +142,36 @@ public class QueryParameters { */ public static final String MATCHING_FIELD_SETS = "matching.field.sets"; + /** + * Used to specify fields to perform a group-by with. + */ public static final String GROUP_FIELDS = "group.fields"; + + /** + * Used to specify the fields for which a sum should be calculated in groups resulting from a group-by operation. + */ + public static final String SUM_FIELDS = "sum.fields"; + + /** + * Used to specify the fields for which the max should be found in groups resulting from a group-by operation. + */ + public static final String MAX_FIELDS = "max.fields"; + + /** + * Used to specify the fields for which the min should be found in groups resulting from a group-by operation. + */ + public static final String MIN_FIELDS = "min.fields"; + + /** + * Used to specify the fields for which a count should be calculated in groups resulting from a group-by operation. + */ + public static final String COUNT_FIELDS = "count.fields"; + + /** + * Used to specify the fields for which an average should be calculated in groups resulting from a group-by operation. + */ + public static final String AVERAGE_FIELDS = "average.fields"; + public static final String GROUP_FIELDS_BATCH_SIZE = "group.fields.batch.size"; public static final String UNIQUE_FIELDS = "unique.fields"; diff --git a/warehouse/query-core/src/main/java/datawave/query/common/grouping/AbstractAggregator.java b/warehouse/query-core/src/main/java/datawave/query/common/grouping/AbstractAggregator.java new file mode 100644 index 0000000000..97f5a176a9 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/common/grouping/AbstractAggregator.java @@ -0,0 +1,46 @@ +package datawave.query.common.grouping; + +import java.util.Set; + +import org.apache.accumulo.core.security.ColumnVisibility; + +import datawave.query.attributes.Attribute; + +/** + * Abstract implementation of {@link Aggregator} + * + * @param + * the aggregation result type + */ +public abstract class AbstractAggregator implements Aggregator { + + /** + * The name of the field being aggregated. + */ + protected final String field; + + protected AbstractAggregator(String field) { + this.field = field; + } + + @Override + public abstract AggregateOperation getOperation(); + + @Override + public String getField() { + return this.field; + } + + @Override + public abstract Set getColumnVisibilities(); + + @Override + public abstract AGGREGATE getAggregation(); + + @Override + public abstract void aggregate(Attribute value); + + @Override + public abstract void merge(Aggregator other); + +} diff --git a/warehouse/query-core/src/main/java/datawave/query/common/grouping/AggregateOperation.java b/warehouse/query-core/src/main/java/datawave/query/common/grouping/AggregateOperation.java new file mode 100644 index 0000000000..6cf2d0e2ad --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/common/grouping/AggregateOperation.java @@ -0,0 +1,8 @@ +package datawave.query.common.grouping; + +/** + * Represents the aggregation operations currently available. + */ +public enum AggregateOperation { + SUM, MAX, MIN, COUNT, AVERAGE +} diff --git a/warehouse/query-core/src/main/java/datawave/query/common/grouping/Aggregator.java b/warehouse/query-core/src/main/java/datawave/query/common/grouping/Aggregator.java new file mode 100644 index 0000000000..ec4768e5ed --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/common/grouping/Aggregator.java @@ -0,0 +1,81 @@ +package datawave.query.common.grouping; + +import java.util.Collection; +import java.util.List; +import java.util.Set; + +import org.apache.accumulo.core.security.ColumnVisibility; + +import datawave.query.attributes.Attribute; + +/** + * Provides the methods by which aggregates can be calculated for fields when grouped by other fields. + * + * @param + * the aggregate result type + */ +public interface Aggregator { + + /** + * Return the aggregate operation being performed. + * + * @return the aggregate operation + */ + AggregateOperation getOperation(); + + /** + * Return the field being aggregated. + * + * @return the field + */ + String getField(); + + /** + * Returns an unmodifiable set of all distinct column visibilities for each attribute aggregated into this aggregator. Possibly empty, but never null. + * + * @return a set of the column visibilities + */ + Set getColumnVisibilities(); + + /** + * Return the aggregation result. + * + * @return the aggregation + */ + AGGREGATE getAggregation(); + + /** + * Return true if this aggregator has aggregated at least one attribute. + * + * @return true if this aggregator has at least one attribute aggregated to it, or false otherwise + */ + boolean hasAggregation(); + + /** + * Aggregate the given value into this aggregator. + * + * @param value + * the value to aggregate + */ + void aggregate(Attribute value); + + /** + * Aggregate each of the given values into this aggregator. + * + * @param values + * the value to aggregate + */ + default void aggregateAll(Collection> values) { + values.forEach(this::aggregate); + } + + /** + * Merges the given aggregator into this aggregator + * + * @param other + * the aggregator to merge + * @throws IllegalArgumentException + * if the other aggregator is not the same type as this aggregator + */ + void merge(Aggregator other); +} diff --git a/warehouse/query-core/src/main/java/datawave/query/common/grouping/AverageAggregator.java b/warehouse/query-core/src/main/java/datawave/query/common/grouping/AverageAggregator.java new file mode 100644 index 0000000000..a717ddd5cd --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/common/grouping/AverageAggregator.java @@ -0,0 +1,157 @@ +package datawave.query.common.grouping; + +import java.math.BigDecimal; +import java.math.MathContext; +import java.math.RoundingMode; +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +import org.apache.accumulo.core.security.ColumnVisibility; +import org.apache.commons.lang.builder.ToStringBuilder; + +import datawave.query.attributes.Attribute; +import datawave.query.attributes.Numeric; +import datawave.query.attributes.TypeAttribute; + +/** + * Calculates the average value of aggregated field values. This is limited to fields for which their values can be parsed as {@link BigDecimal} instances. + */ +public class AverageAggregator extends AbstractAggregator { + + private static final MathContext MATH_CONTEXT = new MathContext(10, RoundingMode.HALF_UP); + + /** + * The current numerator value of the average. + */ + private BigDecimal numerator; + + /** + * The current divisor value of the average. + */ + private BigDecimal divisor; + + /** + * The current average value. + */ + private BigDecimal average; + + /** + * The column visibilities of all attributes aggregated. + */ + private final Set columnVisibilities; + + public static AverageAggregator of(String field, TypeAttribute numerator, TypeAttribute divisor) { + return new AverageAggregator(field, numerator.getType().getDelegate(), divisor.getType().getDelegate(), numerator.getColumnVisibility()); + } + + public AverageAggregator(String field) { + super(field); + this.columnVisibilities = new HashSet<>(); + } + + private AverageAggregator(String field, BigDecimal numerator, BigDecimal divisor, ColumnVisibility columnVisibility) { + this(field); + this.numerator = numerator; + this.divisor = divisor; + this.average = numerator.divide(divisor, MATH_CONTEXT); + if (columnVisibility != null) { + this.columnVisibilities.add(columnVisibility); + } + } + + /** + * Returns {@link AggregateOperation#AVERAGE}. + * + * @return {@link AggregateOperation#AVERAGE} + */ + @Override + public AggregateOperation getOperation() { + return AggregateOperation.AVERAGE; + } + + @Override + public Set getColumnVisibilities() { + return Collections.unmodifiableSet(columnVisibilities); + } + + /** + * Return the average value seen for the field. + * + * @return the average value, or null if no values have been aggregated yet + */ + @Override + public BigDecimal getAggregation() { + return average; + } + + @Override + public boolean hasAggregation() { + return average != null; + } + + /** + * Return the current sum for the field values. + * + * @return the sum + */ + public BigDecimal getNumerator() { + return numerator; + } + + /** + * Return the current count for the field. + * + * @return the count + */ + public BigDecimal getDivisor() { + return divisor; + } + + /** + * Adds the value into the current sum and increments the total count by one. The average will be recalculated the next time {@link #getAggregation()} is + * called. + * + * @param value + * the value to aggregate + * @throws IllegalArgumentException + * if the given value is not a {@link Numeric} type + */ + @Override + public void aggregate(Attribute value) { + BigDecimal number; + try { + number = new BigDecimal(value.getData().toString()); + } catch (Exception e) { + throw new IllegalArgumentException("Unable to calculate an average with non-numerical value '" + value.getData() + "'", e); + } + if (numerator == null) { + numerator = number; + divisor = BigDecimal.ONE; + } else { + numerator = numerator.add(number); + divisor = divisor.add(BigDecimal.ONE); + } + average = numerator.divide(divisor, MATH_CONTEXT); + columnVisibilities.add(value.getColumnVisibility()); + } + + @Override + public void merge(Aggregator other) { + if (other instanceof AverageAggregator) { + AverageAggregator aggregator = (AverageAggregator) other; + this.numerator = numerator.add(aggregator.numerator); + this.divisor = divisor.add(aggregator.divisor); + this.average = this.numerator.divide(this.divisor, MATH_CONTEXT); + this.columnVisibilities.addAll(aggregator.columnVisibilities); + } else { + throw new IllegalArgumentException("Cannot merge instance of " + other.getClass().getName()); + } + } + + @Override + public String toString() { + return new ToStringBuilder(this).append("field", field).append("average", average).append("numerator", numerator).append("divisor", divisor) + .append("columnVisibilities", columnVisibilities).toString(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/common/grouping/CountAggregator.java b/warehouse/query-core/src/main/java/datawave/query/common/grouping/CountAggregator.java new file mode 100644 index 0000000000..1144d1f298 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/common/grouping/CountAggregator.java @@ -0,0 +1,103 @@ +package datawave.query.common.grouping; + +import java.math.BigDecimal; +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +import org.apache.accumulo.core.security.ColumnVisibility; +import org.apache.commons.lang.builder.ToStringBuilder; + +import datawave.query.attributes.Attribute; +import datawave.query.attributes.TypeAttribute; + +/** + * Determines the total count of aggregated field values. This supports values of all {@link Attribute} types. + */ +public class CountAggregator extends AbstractAggregator { + + /** + * The total number of times the field was seen. + */ + private long count; + + /** + * The column visibilities of all attributes aggregated. + */ + private final Set columnVisibilities; + + public static CountAggregator of(String field, TypeAttribute attribute) { + return new CountAggregator(field, attribute.getType().getDelegate().longValue(), attribute.getColumnVisibility()); + } + + public CountAggregator(String field) { + super(field); + this.columnVisibilities = new HashSet<>(); + } + + private CountAggregator(String field, long count, ColumnVisibility visibility) { + this(field); + this.count = count; + if (visibility != null) { + columnVisibilities.add(visibility); + } + } + + /** + * Returns {@link AggregateOperation#COUNT}. + * + * @return {@link AggregateOperation#COUNT} + */ + @Override + public AggregateOperation getOperation() { + return AggregateOperation.COUNT; + } + + @Override + public Set getColumnVisibilities() { + return Collections.unmodifiableSet(columnVisibilities); + } + + /** + * Return the total number of times a field was seen. + * + * @return the total count + */ + @Override + public Long getAggregation() { + return count; + } + + @Override + public boolean hasAggregation() { + return count > 0L; + } + + /** + * Increments the current count by 1. + * + * @param value + * the value to aggregate + */ + @Override + public void aggregate(Attribute value) { + count++; + this.columnVisibilities.add(value.getColumnVisibility()); + } + + @Override + public void merge(Aggregator other) { + if (other instanceof CountAggregator) { + CountAggregator aggregator = (CountAggregator) other; + this.count += aggregator.count; + this.columnVisibilities.addAll(aggregator.columnVisibilities); + } else { + throw new IllegalArgumentException("Cannot merge instance of " + other.getClass().getName()); + } + } + + @Override + public String toString() { + return new ToStringBuilder(this).append("field", field).append("count", count).append("columnVisibilities", columnVisibilities).toString(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/common/grouping/DocumentGrouper.java b/warehouse/query-core/src/main/java/datawave/query/common/grouping/DocumentGrouper.java new file mode 100644 index 0000000000..af98c779c7 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/common/grouping/DocumentGrouper.java @@ -0,0 +1,717 @@ +package datawave.query.common.grouping; + +import static org.slf4j.LoggerFactory.getLogger; + +import java.math.BigDecimal; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; + +import org.apache.accumulo.core.data.Key; +import org.javatuples.Pair; +import org.slf4j.Logger; + +import com.google.common.collect.ArrayListMultimap; +import com.google.common.collect.HashMultimap; +import com.google.common.collect.Multimap; +import com.google.common.collect.Sets; + +import datawave.data.type.Type; +import datawave.query.attributes.Attribute; +import datawave.query.attributes.Document; +import datawave.query.attributes.TypeAttribute; + +/** + * This class provides the primary functionality needed to group documents and aggregate field values within identified groups (regardless if done server or + * client-side). + *

+ *

+ * Grouping + *

+ * Grouping fields across documents will result in groupings of distinct value groupings for each specified field to group, as well as the total number of times + * each particular grouping combination was seen. Fields to group by can be specified by the following options: + *

+ * Groupings may be of any size that encompass none, some, or all of the target group fields. If a document has no entries for any of the target group fields, + * it will be grouped as part of an 'empty' grouping, and all target aggregation entries will be aggregated to the empty grouping. The count for 'empty' groups + * will be the same as the number of documents seen without any group-by fields. Values are grouped together based on the format of each document entry's key, + * which may have one of the following formats: + * + * Values of fields with the same context and instance are considered direct one-to-one grouping matches, and will be placed within the same groupings. Direct + * matches cannot be determined for values of fields that do not have a context, and as such they will be combined with each possible grouping, effectively a + * cartesian product. Direct matches are prioritized and found first before indirect matches are combined with them. + *

+ *

+ * Aggregation + *

+ * Once all valid groupings have been identified and counted, aggregation can be performed on the values of any specified fields for each grouping. The + * aggregation fields can differ from the group-by fields. The following aggregation operations are supported: + *

+ *

+ * SUM: Sum up all the values for specified fields across groupings. This operation is limited to fields with numerical values. Fields may be + * specified via: + *

+ * MAX: Find the max values for specified fields across groupings. Fields may be specified via: + * + * MIN: Find the min values for specified fields across groupings. Fields may be specified via: + * + * COUNT: Count the number of times values were seen for specified fields across groupings. Fields may be specified via: + * + * AVERAGE: Find the average of all values for specified fields across groupings. This operation is limited to fields with numerical values. + * Fields may be specified via: + * + */ +public class DocumentGrouper { + + private static final Logger log = getLogger(DocumentGrouper.class); + + public static final String GROUP_COUNT = "COUNT"; + public static final String FIELD_SUM_SUFFIX = "_SUM"; + public static final String FIELD_MAX_SUFFIX = "_MAX"; + public static final String FIELD_MIN_SUFFIX = "_MIN"; + public static final String FIELD_AVERAGE_NUMERATOR_SUFFIX = "_AVERAGE_NUMERATOR"; + public static final String FIELD_AVERAGE_DIVISOR_SUFFIX = "_AVERAGE_DIVISOR"; + public static final String FIELD_AVERAGE_SUFFIX = "_AVERAGE"; + public static final String FIELD_COUNT_SUFFIX = "_COUNT"; + + /** + * Groups and aggregates fields from the entries in the given document and merges the new group information into the given {@link Groups} instance. + * + * @param entry + * the document entry + * @param groupFields + * the fields to group and aggregate + * @param groups + * the {@link Groups} instance to merge newly found groups into + */ + public static void group(Map.Entry entry, GroupFields groupFields, Groups groups) { + DocumentGrouper documentGrouper = new DocumentGrouper(entry, groupFields, groups); + documentGrouper.group(); + } + + private final Key documentKey; + private final Document document; + private final Set groupFields; + private final Map reverseModelMappings; + private final FieldAggregator.Factory fieldAggregatorFactory; + + private final Groups groups; + private final Groups currentGroups = new Groups(); + private final FieldIndex groupFieldsIndex = new FieldIndex(false); + private final FieldIndex aggregateFieldsIndex = new FieldIndex(true); + private final Multimap,Grouping> groupingContextAndInstancesSeenForGroups = HashMultimap.create(); + private final int maxGroupSize; + + private DocumentGrouper(Map.Entry documentEntry, GroupFields groupFields, Groups groups) { + this.documentKey = documentEntry.getKey(); + this.document = documentEntry.getValue(); + this.groupFields = groupFields.getGroupByFields(); + this.fieldAggregatorFactory = groupFields.getFieldAggregatorFactory(); + this.reverseModelMappings = groupFields.getReverseModelMap(); + this.groups = groups; + this.maxGroupSize = this.groupFields.size(); + } + + /** + * Identify valid groups in the given document and aggregate relevant events to those groups. + */ + private void group() { + log.trace("apply to {} {}", documentKey, document); + // If the document contains entries that indicate grouping has already been performed, we are seeing a document that was generated by + // GroupingIterator.flatten(). No further grouping can occur. Extract the grouping information from the document and merge them into the current groups. + if (isDocumentAlreadyGrouped()) { + extractGroupsFromDocument(); + } else { // Otherwise, the document contains entries that have not yet been grouped and counted. + // Index the document entries. + indexDocumentEntries(); + // Group the document entries. + groupEntries(); + // Aggregate fields only if there were aggregation fields specified and if any entries for aggregation were found. + if (fieldAggregatorFactory.hasFieldsToAggregate() && !aggregateFieldsIndex.isEmpty()) { + aggregateEntries(); + } + + // Merge the groups and aggregations we found in this particular group-by operation into the groups passed by the user. The separation is required + // to ensure that any grouping and aggregation done in this session was applied only to the current document. + this.groups.mergeAll(currentGroups); + } + } + + /** + * Return whether the document contains entries representing a flattened set of group counts generated by {@link datawave.query.iterator.GroupingIterator}. + * + * @return true if the document contains flattened group counts, or false otherwise. + */ + private boolean isDocumentAlreadyGrouped() { + return document.getDictionary().keySet().stream().anyMatch(key -> key.startsWith(GROUP_COUNT)); + } + + /** + * Extract grouping information from the current document and add them to the current groups. Each field will be remapped if a reverse-model mapping was + * supplied. + */ + @SuppressWarnings("unchecked") + private void extractGroupsFromDocument() { + // Parse a field from each entry and store them in instanceToFields. The id indicates which grouping, count, and aggregated values go together. + Multimap idToFields = HashMultimap.create(); + for (Map.Entry>> entry : document.entrySet()) { + Field field = parseField(entry); + idToFields.put(field.getInstance(), field); + } + // For each distinct grouping, parse and write the grouping information to the current groups. + for (String instance : idToFields.keySet()) { + // The distinct grouping. + Grouping grouping = new Grouping(); + // The aggregated values. + FieldAggregator fieldAggregator = new FieldAggregator(); + // The total times the grouping was seen. + int count = 0; + for (Field field : idToFields.get(instance)) { + // We found the group count. + if (field.getBase().equals(GROUP_COUNT)) { + TypeAttribute attribute = (TypeAttribute) field.getAttribute(); + count = attribute.getType().getDelegate().intValue(); + // We found the sum of an aggregated field. + } else if (field.getBase().endsWith(FIELD_SUM_SUFFIX)) { + TypeAttribute attribute = (TypeAttribute) field.getAttribute(); + String fieldName = removeSuffix(field.getBase(), FIELD_SUM_SUFFIX); + fieldAggregator.mergeAggregator(SumAggregator.of(fieldName, attribute)); + // We found the numerator of the average of an aggregated field. + } else if (field.getBase().endsWith(FIELD_AVERAGE_NUMERATOR_SUFFIX)) { + String unmappedFieldName = removeSuffix(field.getBase(), FIELD_AVERAGE_NUMERATOR_SUFFIX); + String fieldName = removeSuffix(field.getBase(), FIELD_AVERAGE_NUMERATOR_SUFFIX); + // It's possible that the divisor will be stored under a previously unmapped field name. For example, the field ETA from + // ETA_AVERAGE_NUMERATOR.1 could be mapped to AG here. Use the original field name (e.g. ETA) to ensure we find the + // corresponding divisor (e.g. ETA_AVERAGE_DIVISOR.1) for the numerator. + String divisorField = unmappedFieldName + FIELD_AVERAGE_DIVISOR_SUFFIX + "." + field.getInstance(); + TypeAttribute divisorAttribute = (TypeAttribute) document.get(divisorField); + TypeAttribute numeratorAttribute = (TypeAttribute) field.getAttribute(); + fieldAggregator.mergeAggregator(AverageAggregator.of(fieldName, numeratorAttribute, divisorAttribute)); + // We found the count of an aggregated field. + } else if (field.getBase().endsWith(FIELD_COUNT_SUFFIX)) { + TypeAttribute attribute = (TypeAttribute) field.getAttribute(); + String fieldName = removeSuffix(field.getBase(), FIELD_COUNT_SUFFIX); + fieldAggregator.mergeAggregator(CountAggregator.of(fieldName, attribute)); + // We found the min of an aggregated field. + } else if (field.getBase().endsWith(FIELD_MIN_SUFFIX)) { + String fieldName = removeSuffix(field.getBase(), FIELD_MIN_SUFFIX); + fieldAggregator.mergeAggregator(MinAggregator.of(fieldName, field.getAttribute())); + // We found the max of an aggregated field. + } else if (field.getBase().endsWith(FIELD_MAX_SUFFIX)) { + String fieldName = removeSuffix(field.getBase(), FIELD_MAX_SUFFIX); + fieldAggregator.mergeAggregator(MaxAggregator.of(fieldName, field.getAttribute())); + // We found a field that is part of the grouping. + } else if (!field.getBase().endsWith(FIELD_AVERAGE_DIVISOR_SUFFIX)) { + Attribute attribute = field.getAttribute(); + GroupingAttribute newAttribute = new GroupingAttribute<>((Type) attribute.getData(), new Key(field.getBase()), true); + newAttribute.setColumnVisibility(attribute.getColumnVisibility()); + grouping.add(newAttribute); + } + } + // Create a new group and merge it into the existing groups. + Group group = new Group(grouping, count); + group.setFieldAggregator(fieldAggregator); + group.addDocumentVisibility(document.getColumnVisibility()); + groups.mergeOrPutGroup(group); + } + } + + /** + * Return a substring of the given str without the given suffix. + * + * @param str + * the string + * @param suffix + * the suffix + * @return the string without the suffix + */ + private String removeSuffix(String str, String suffix) { + int suffixLength = suffix.length(); + return str.substring(0, str.length() - suffixLength); + } + + /** + * Identify which events in the document are targets for grouping and/or aggregation, and index them. + */ + private void indexDocumentEntries() { + for (Map.Entry> entry : document.entrySet()) { + Field field = parseField(entry); + // The current field is a target for grouping. + if (groupFields.contains(field.getBase())) { + groupFieldsIndex.index(field); + } + // The current field is a target for aggregation. + if (fieldAggregatorFactory.isFieldToAggregate(field.getBase())) { + aggregateFieldsIndex.index(field); + } + } + } + + /** + * Identify valid groupings consisting of target group pairs and create/update their corresponding {@link Group} in {@link #currentGroups}. + */ + private void groupEntries() { + // If we found any entries for target group fields, identify all valid groupings. + if (groupEntriesFound()) { + // The groupings combinations that we find. Each combination may only have one Field from a particular target group field, e.g. if doing + // #GROUP_BY(AGE,GENDER), a combination set will have at most one AGE field and one GENDER field. + List> groupings = new ArrayList<>(); + + // If we only have one target grouping field, we do not need to find any group combinations. All events for the given target group field should be + // tracked as individual groupings. + if (maxGroupSize == 1) { + groupFieldsIndex.fields.values().stream().map(Collections::singleton).forEach(groupings::add); + } else { + // If we have any group field events with grouping contexts and instances, e.g. GENDER.FOO.1, it's possible that we will find direct matches to + // other group field events with the same grouping context and instance (a direct match). These should be found first for efficiency purposes. + if (groupFieldsIndex.hasFieldsWithPossibleDirectMatch()) { + groupings = getGroupingsWithDirectMatches(); + } + // If we have any group field events that do not have a grouping context and instance, e.g. GENDER.1 or GENDER, then each one of those events + // should + // be combined with each existing group combination, effectively creating cartesian products. + if (groupFieldsIndex.hasFieldsWithoutDirectMatch()) { + groupings = getGroupingsWithoutDirectMatches(groupings); + } + } + + // Track each identified grouping. + groupings.forEach(this::trackGroup); + } else { + // If no entries were found for any of the target group fields, create a single 'empty' group that will represent this document in the final + // grouping results. + trackGroup(Grouping.emptyGrouping()); + } + } + + /** + * Identify grouping combinations that are direct matches to each other based on the grouping context and instance of the field events. If we do not find + * any direct match at all for a specified target group field, then all events for the group field will be combined. + * + * @return the direct match combinations + */ + private List> getGroupingsWithDirectMatches() { + List> groupings = new ArrayList<>(); + Set fieldsWithGroupingContextAndInstance = groupFieldsIndex.getFieldsWithPossibleDirectMatch(); + // If we only saw one field with a grouping context and instance, return a list of singletons with each field event. We cannot create any combinations + // at this time. + if (fieldsWithGroupingContextAndInstance.size() == 1) { + Collection fields = groupFieldsIndex.getFields(fieldsWithGroupingContextAndInstance.iterator().next()); + fields.stream().map(Collections::singleton).forEach(groupings::add); + } else { + // If we have more than one target field with a grouping context and instance, determine the correct groupings based off matching the grouping + // context and instance where possible with direct 1-to-1 matches, i.e. AGE.FOO.1 is a direct match to GENDER.FOO.1. + Multimap,Field> groupingContextAndInstanceToField = HashMultimap.create(); + for (String fieldName : fieldsWithGroupingContextAndInstance) { + Collection fields = groupFieldsIndex.getFields(fieldName); + for (Field field : fields) { + groupingContextAndInstanceToField.put(Pair.with(field.getGroupingContext(), field.getInstance()), field); + } + } + + // Sort the entries by the number of direct matches seen for each grouping context-instance pair. + SortedSet,Collection>> directMatchesSortedByPrevalence = new TreeSet<>( + Comparator.comparingInt((Map.Entry,Collection> left) -> left.getValue().size()).reversed() + .thenComparing(Map.Entry::getKey)); + directMatchesSortedByPrevalence.addAll(groupingContextAndInstanceToField.asMap().entrySet()); + + // Map of group target field names to the grouping combinations found for them. + Multimap,Set> fieldsToGroupings = ArrayListMultimap.create(); + // Tracks the largest size seen for any combination of direct matches for target group fields. + Map fieldToLargestGroupingSize = new HashMap<>(); + + for (Map.Entry,Collection> entry : directMatchesSortedByPrevalence) { + Collection fields = entry.getValue(); + SortedSet groupingFields = new TreeSet<>(); + boolean keep = false; + for (Field field : fields) { + groupingFields.add(field.getBase()); + // If we have seen this field before associated with another grouping context and instance, only keep this grouping if it is the same size + // as the largest grouping we've seen for the field. + if (fieldToLargestGroupingSize.containsKey(field.getBase())) { + if (fields.size() == fieldToLargestGroupingSize.get(field.getBase())) { + keep = true; + } + } else { + // If this is the first time we are seeing this field, then we have found the largest batch size for the grouping that this field is in. + // Automatically keep this grouping. + fieldToLargestGroupingSize.put(field.getBase(), fields.size()); + keep = true; + } + } + if (keep) { + fieldsToGroupings.put(groupingFields, Sets.newHashSet(fields)); + } + } + + // Now that we've found the largest direct match combinations for each target group field, we need to effectively create cartesian products between + // each combination. For instance, given the following grouping combinations resulting from #GROUP_BY(AGE,GENDER,RECORD_ID,RECORD_TEXT,BUILDING): + // + // {AGE,GENDER} => [{"20", "MALE"},{"10", "FEMALE"}] + // {RECORD_ID,RECORD_TEXT} => [{"123", "Summary"}] + // {BUILDING} => [{West},{East}] + // + // We want to generate the following combinations: + // {"20","MALE","123","Summary","West"} + // {"20","MALE","123","Summary","East"} + // {"10","FEMALE","123","Summary","West"} + // {"10","FEMALE","123","Summary","East"} + for (SortedSet fields : fieldsToGroupings.keySet()) { + Collection> currentGroupings = fieldsToGroupings.get(fields); + if (groupings.isEmpty()) { + groupings.addAll(currentGroupings); + } else { + List> newGroupings = new ArrayList<>(); + for (Set oldGrouping : groupings) { + for (Set currentGrouping : currentGroupings) { + Set newGrouping = new HashSet<>(oldGrouping); + newGrouping.addAll(currentGrouping); + newGroupings.add(newGrouping); + } + } + groupings = newGroupings; + } + } + } + return groupings; + } + + /** + * Combine each field event for target group fields that do not have both a grouping context and instance to any previously found grouping combinations. + * + * @param prevGroupings + * the combinations that have been found thus far + * @return the updated grouping combinations + */ + private List> getGroupingsWithoutDirectMatches(List> prevGroupings) { + List> groupings = new ArrayList<>(prevGroupings); + for (String fieldName : groupFieldsIndex.getFieldsWithoutDirectMatch()) { + Collection fields = groupFieldsIndex.getFields(fieldName); + // If there are no previous grouping combinations, add each field event as a singular combination. + if (groupings.isEmpty()) { + for (Field field : fields) { + groupings.add(Sets.newHashSet(field)); + } + } else { + // Effectively create cartesian products of each previously seen grouping combination and each field event for the current target event field. + // For instance, if we have the previous combination [{"20","MALE"},{"10","FEMALE"}] and the field events {"A","B","C"}, we want to generate + // the following combinations: + // + // {"20","MALE", "A"} + // {"20","MALE", "B"} + // {"20","MALE", "C"} + // {"10","FEMALE", "A"} + // {"10","FEMALE", "B"} + // {"10","FEMALE", "C"} + List> newGroupings = new ArrayList<>(); + for (Set oldGrouping : groupings) { + for (Field field : fields) { + Set newGrouping = new HashSet<>(oldGrouping); + newGrouping.add(field); + newGroupings.add(newGrouping); + } + } + groupings = newGroupings; + } + } + return groupings; + } + + /** + * Track the groups identified by the given field event combinations. + * + * @param groupedFields + * the group combination + */ + private void trackGroup(Collection groupedFields) { + // The grouping context-instance pairs seen for all grouping keys generated in this method. + Set> groupingContextAndInstances = new HashSet<>(); + // The set of 'keys' that are used to identify individual distinct groupings. + List groupings = new ArrayList<>(); + // It is possible for a field event in a grouping combination to have a multi-value attribute. If this occurs, we must once again create cartesian + // products between all the values of the attribute of each field. + for (Field field : groupedFields) { + // Track the grouping context-instance pair. This is required for us to be able to find direct matches later when aggregating. + if (field.hasGroupingContext() && field.hasInstance()) { + groupingContextAndInstances.add(Pair.with(field.getGroupingContext(), field.getInstance())); + } + // If we have no grouping keys yet, create keys consisting of each value of the current field. + if (groupings.isEmpty()) { + for (Attribute attribute : field.getAttributes()) { + GroupingAttribute copy = createCopyWithKey(attribute, field.getBase()); + groupings.add(new Grouping(copy)); + } + } else { + // Otherwise, create the cartesian product between the current field's value and each existing key. + List newGroupings = new ArrayList<>(); + for (Attribute attribute : field.getAttributes()) { + GroupingAttribute copy = createCopyWithKey(attribute, field.getBase()); + for (Grouping grouping : groupings) { + Grouping groupingCopy = new Grouping(grouping); + groupingCopy.add(copy); + newGroupings.add(groupingCopy); + } + } + groupings = newGroupings; + } + } + + // Track which grouping context-instance pairs we have seen for each grouping key. + for (Pair groupingContextAndInstance : groupingContextAndInstances) { + this.groupingContextAndInstancesSeenForGroups.putAll(groupingContextAndInstance, groupings); + } + + // Now we can create/update groups in currentGroups for each grouping key. + groupings.forEach(this::trackGroup); + } + + /** + * Create/update the group for the given grouping. + * + * @param grouping + * the grouping to track + */ + private void trackGroup(Grouping grouping) { + // Get the group. + Group group = currentGroups.getGroup(grouping); + // Create a group for the grouping if one does not already exist. + if (group == null) { + group = new Group(grouping); + group.setFieldAggregator(fieldAggregatorFactory.newInstance()); + currentGroups.putGroup(group); + } + // Add the visibilities of each attribute in the grouping for combination later, and increment the count for how many times this distinct + // grouping was seen. + group.addAttributeVisibilities(grouping); + group.incrementCount(); + group.addDocumentVisibility(document.getColumnVisibility()); + } + + private GroupingAttribute createCopyWithKey(Attribute attribute, String key) { + Type type = ((TypeAttribute) attribute).getType(); + GroupingAttribute newAttribute = new GroupingAttribute<>(type, new Key(key), true); + newAttribute.setColumnVisibility(attribute.getColumnVisibility()); + return newAttribute; + } + + /** + * Aggregate all qualifying events that are from target aggregation fields. + */ + private void aggregateEntries() { + // Groupings were found in the document. Aggregate entries according to their association based on each entry's grouping context and instance. + if (groupEntriesFound()) { + // If we have any target events for aggregation that have a grouping context and instance, e.g. AGE.FOO.1, attempt to find groups that have matching + // grouping context and instance pairs, and aggregate the events into those groups only. If we do not find any direct match at all for a specified + // aggregation field, then all events for the aggregation field will be aggregated into each group. + if (aggregateFieldsIndex.hasFieldsWithPossibleDirectMatch()) { + // Attempt to find a direct match for the current aggregation target field. + for (String fieldName : aggregateFieldsIndex.fieldToFieldsByGroupingContextAndInstance.keySet()) { + Multimap,Field> groupingContextAndInstanceToFields = aggregateFieldsIndex.fieldToFieldsByGroupingContextAndInstance + .get(fieldName); + Set> aggregatePairs = groupingContextAndInstanceToFields.keySet(); + Set> groupPairs = this.groupingContextAndInstancesSeenForGroups.keySet(); + // A group and an aggregation event is considered to be a direct match if and only if the group contains any event that has the same + // grouping context and instance as the aggregation event. + Set> directMatches = Sets.intersection(aggregatePairs, groupPairs); + // If we have any direct matches, then only aggregate the direct matches into the groups where we saw a direct match. + if (!directMatches.isEmpty()) { + for (Pair directMatch : directMatches) { + for (Grouping grouping : this.groupingContextAndInstancesSeenForGroups.get(directMatch)) { + Group group = currentGroups.getGroup(grouping); + Collection fields = groupingContextAndInstanceToFields.get(directMatch); + group.aggregateAll(fields); + } + } + } else { + // Otherwise, aggregate all events for this field into all groups. + Collection fields = aggregateFieldsIndex.getFields(fieldName); + currentGroups.aggregateToAllGroups(fields); + } + } + } + // If there are any target aggregation events that do not have a grouping context, e.g. AGE or AGE.1, then all target aggregation events should be + // aggregated into all groups. + if (aggregateFieldsIndex.hasFieldsWithoutDirectMatch()) { + for (String fieldName : aggregateFieldsIndex.fieldsWithoutDirectMatch) { + Collection fields = aggregateFieldsIndex.getFields(fieldName); + currentGroups.aggregateToAllGroups(fields); + } + } + } else { + // No groupings were found in the document. In this case, we will consider this document to contain a placeholder 'empty' grouping, and aggregate + // all aggregation entries to the empty grouping. + Group group = currentGroups.getGroup(Grouping.emptyGrouping()); + // Aggregate all aggregate entries to the grouping. + Multimap fields = aggregateFieldsIndex.fields; + for (String field : fields.keySet()) { + group.aggregateAll(field, fields.get(field)); + } + } + } + + private boolean groupEntriesFound() { + return !groupFieldsIndex.isEmpty(); + } + + /** + * Parses the relevant information from the given entry and returns a {@link Field} that contains the field name, group, instance, and the value. It is + * assumed that the entry's key will have the format {@code }, {@code .} or {@code ....}. + * + * @param entry + * the document entry + * @return the field entry. + */ + private Field parseField(Map.Entry> entry) { + String field = entry.getKey(); + String name = field; + String groupingContext = null; + String instance = null; + + int firstPeriod = field.indexOf('.'); + // If the field name contains at least one period, the field's format is either . or .... + if (firstPeriod != -1) { + // The field name is everything before the first period. + name = field.substring(0, firstPeriod); + + int secondPeriod = field.indexOf(".", firstPeriod + 1); + // If a second period is present, we know that field's format is .... + if (secondPeriod != -1) { + // Parse the group from the substring directly following the name. + groupingContext = field.substring(firstPeriod + 1, secondPeriod); + // Parse the instance from the substring after the last period. + instance = field.substring(field.lastIndexOf(".") + 1); + } else { + // If there is no second period present, the field's format is .. + instance = field.substring(firstPeriod + 1); + } + } + + // Map the field name to the root model name. This ensures that even if we're grouping fields that can be seen with different model names, e.g. AG, ETA, + // and AGE, that the same root name will be used across the board to ensure that they're treated as from the same target group/aggregation field. + name = getMappedFieldName(name); + + return new Field(name, groupingContext, instance, entry.getValue()); + } + + /** + * Get the corresponding model mapping for the field. If model mappings have not been provided, the original field will be returned. + * + * @param field + * the field to map + * @return the mapped field + */ + private String getMappedFieldName(String field) { + return reverseModelMappings.getOrDefault(field, field); + } + + /** + * This class maintains useful indexes that will be used for determining direct and non-direct matches when grouping and aggregating. + */ + private static class FieldIndex { + + // Map of field names to their entries. + private final Multimap fields = ArrayListMultimap.create(); + // The set of fields with possible direct matches. + private final Set fieldsWithPossibleDirectMatch = new HashSet<>(); + // The set of fields with no direct matches. + private final Set fieldsWithoutDirectMatch = new HashSet<>(); + // Map of field names to Multimaps of grouping contexts to entries. + private final Map,Field>> fieldToFieldsByGroupingContextAndInstance = new HashMap<>(); + // Whether to accept entries that have null attributes for indexing. + private final boolean allowNullAttributes; + + private FieldIndex(boolean allowNullAttributes) { + this.allowNullAttributes = allowNullAttributes; + } + + /** + * Index the given {@link Field}. If {@link #allowNullAttributes} is set to false and the given field has a null attribute, it will not be indexed. + * + * @param field + * the field to index + */ + public void index(Field field) { + // Check if we can index this field. + if (field.getAttribute() != null || allowNullAttributes) { + fields.put(field.getBase(), field); + // If the field has a grouping context and instance, it's possible that it may have a direct match. Index the field and its grouping + // context-instance pair. + if (field.hasGroupingContext() && field.hasInstance()) { + fieldsWithPossibleDirectMatch.add(field.getBase()); + Multimap,Field> groupingContextAndInstanceToField = fieldToFieldsByGroupingContextAndInstance.get(field.getBase()); + if (groupingContextAndInstanceToField == null) { + groupingContextAndInstanceToField = HashMultimap.create(); + fieldToFieldsByGroupingContextAndInstance.put(field.getBase(), groupingContextAndInstanceToField); + } + groupingContextAndInstanceToField.put(Pair.with(field.getGroupingContext(), field.getInstance()), field); + } else { + // Otherwise, the field will have no direct matches. + fieldsWithoutDirectMatch.add(field.getBase()); + } + } + } + + public Multimap getFields() { + return fields; + } + + public Collection getFields(String field) { + return fields.get(field); + } + + public Set getFieldsWithPossibleDirectMatch() { + return fieldsWithPossibleDirectMatch; + } + + public boolean hasFieldsWithPossibleDirectMatch() { + return !fieldsWithPossibleDirectMatch.isEmpty(); + } + + public boolean hasFieldsWithoutDirectMatch() { + return !fieldsWithoutDirectMatch.isEmpty(); + } + + public Set getFieldsWithoutDirectMatch() { + return fieldsWithoutDirectMatch; + } + + public boolean isEmpty() { + return fields.isEmpty(); + } + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/common/grouping/Field.java b/warehouse/query-core/src/main/java/datawave/query/common/grouping/Field.java new file mode 100644 index 0000000000..7af6af2c16 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/common/grouping/Field.java @@ -0,0 +1,124 @@ +package datawave.query.common.grouping; + +import java.util.Collections; +import java.util.Objects; +import java.util.Set; + +import org.apache.commons.lang.builder.ToStringBuilder; + +import datawave.query.attributes.Attribute; +import datawave.query.attributes.Attributes; + +/** + * Represents an entry from a document with a field name broken down into its name, group, and instance, and the entry's attribute. + */ +class Field { + + private final String base; + private final String groupingContext; + private final String instance; + private final Attribute attribute; + private final Set> attributes; + + public Field(String base, String groupingContext, String instance, Attribute attribute) { + this.base = base; + this.groupingContext = groupingContext; + this.instance = instance; + this.attribute = attribute; + + if (attribute instanceof Attributes) { + this.attributes = ((Attributes) attribute).getAttributes(); + } else { + this.attributes = Collections.singleton(attribute); + } + } + + /** + * Return the field base. + * + * @return the field base + */ + public String getBase() { + return base; + } + + /** + * Return whether this field has a grouping context as part of its name. + * + * @return true if this field has a group, or false otherwise + */ + public boolean hasGroupingContext() { + return groupingContext != null; + } + + /** + * Return the field's group, or null if the field does not have a group. + * + * @return the group + */ + public String getGroupingContext() { + return groupingContext; + } + + /** + * Return the field's instance, or null if the field does not have an instance. + * + * @return the instance + */ + public String getInstance() { + return instance; + } + + /** + * Return whether this field has an instance as part of its name. + * + * @return true if this field has an instance, or false otherwise + */ + public boolean hasInstance() { + return instance != null; + } + + /** + * Return this field's attribute + * + * @return the attribute + */ + public Attribute getAttribute() { + return attribute; + } + + /** + * A convenience method for retrieving all attributes for this {@link Field}, particularly useful when dealing with a {@link Field} that was created with a + * multi-value attribute. If the originating attribute was not multi-value, then the set will consist only of the same attribute returned by + * {@link #getAttribute()}. + * + * @return all attributes, or same attribute as returned by {@link #getAttribute()} if the originating attribute was not multi-value + */ + public Set> getAttributes() { + return attributes; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + Field field = (Field) o; + return Objects.equals(base, field.base) && Objects.equals(groupingContext, field.groupingContext) && Objects.equals(instance, field.instance) + && Objects.equals(attributes, field.attributes); + } + + @Override + public int hashCode() { + return Objects.hash(base, groupingContext, instance, attributes); + } + + @Override + public String toString() { + return new ToStringBuilder(this).append("base", base).append("groupingContext", groupingContext).append("instance", instance) + .append("attributes", attributes).toString(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/common/grouping/FieldAggregator.java b/warehouse/query-core/src/main/java/datawave/query/common/grouping/FieldAggregator.java new file mode 100644 index 0000000000..304dda525b --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/common/grouping/FieldAggregator.java @@ -0,0 +1,404 @@ +package datawave.query.common.grouping; + +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; + +import org.apache.commons.lang.builder.ToStringBuilder; + +import datawave.query.attributes.Attribute; + +/** + * This class provides functionality to aggregate values for specified target fields using specified aggregation operations. + */ +public class FieldAggregator { + + private final Map>> aggregatorMap; + + public FieldAggregator() { + aggregatorMap = new HashMap<>(); + } + + public FieldAggregator(Set sumFields, Set maxFields, Set minFields, Set countFields, Set averageFields) { + this(); + populateAggregators(sumFields, SumAggregator::new); + populateAggregators(maxFields, MaxAggregator::new); + populateAggregators(minFields, MinAggregator::new); + populateAggregators(countFields, CountAggregator::new); + populateAggregators(averageFields, AverageAggregator::new); + } + + /** + * Add an aggregator supplied by the given constructor for each of the given fields to the aggregator map. + * + * @param fields + * the fields + * @param constructor + * the aggregator constructor + */ + private void populateAggregators(Set fields, Function> constructor) { + if (fields != null) { + for (String field : fields) { + Aggregator aggregator = constructor.apply(field); + Map> map = aggregatorMap.get(field); + if (map == null) { + map = new HashMap<>(); + this.aggregatorMap.put(field, map); + } + map.put(aggregator.getOperation(), aggregator); + } + } + } + + /** + * Aggregate the given field to all relevant aggregators. + * + * @param field + * the field to aggregate + */ + public void aggregate(Field field) { + if (aggregatorMap.containsKey(field.getBase())) { + Collection> aggregators = this.aggregatorMap.get(field.getBase()).values(); + for (Attribute attribute : field.getAttributes()) { + aggregators.forEach(aggregator -> aggregator.aggregate(attribute)); + } + } + } + + /** + * Aggregate each of the given fields to all relevant aggregators. + * + * @param fields + * the fields to aggregate + */ + public void aggregateAll(Collection fields) { + fields.forEach(this::aggregate); + } + + /** + * Aggregate each of the given fields to all relevant aggregators for the given field. This is more efficient than {@link #aggregateAll(Collection)} when + * you have a collection of fields for the same base field. + * + * @param field + * the field the base field name + * @param fields + * the fields to aggregate + */ + public void aggregateAll(String field, Collection fields) { + if (aggregatorMap.containsKey(field)) { + List> attributes = fields.stream().map(Field::getAttribute).collect(Collectors.toList()); + Collection> aggregators = this.aggregatorMap.get(field).values(); + for (Aggregator aggregator : aggregators) { + aggregator.aggregateAll(attributes); + } + } + } + + /** + * Return the map of fields to their aggregators. + * + * @return the aggregator map. + */ + public Map>> getAggregatorMap() { + return aggregatorMap; + } + + public Aggregator getAggregator(String field, AggregateOperation operation) { + Map> map = aggregatorMap.get(field); + if (map != null) { + return map.get(operation); + } + return null; + } + + /** + * Return the set of all fields being aggregated. + * + * @return the fields + */ + public Collection getFieldsToAggregate() { + return aggregatorMap.keySet(); + } + + /** + * Merge the given aggregator into this aggregated fields. + * + * @param aggregator + * the aggregator to merge. + */ + public void mergeAggregator(Aggregator aggregator) { + if (aggregator.hasAggregation()) { + Map> map = aggregatorMap.computeIfAbsent(aggregator.getField(), k -> new HashMap<>()); + if (map.containsKey(aggregator.getOperation())) { + Aggregator currentAggregator = map.get(aggregator.getOperation()); + if (currentAggregator.hasAggregation()) { + currentAggregator.merge(aggregator); + } else { + map.put(aggregator.getOperation(), aggregator); + } + } else { + map.put(aggregator.getOperation(), aggregator); + } + } + + } + + /** + * Merge the given aggregated fields into this aggregated fields. + * + * @param other + * the aggregated fields to merge in + */ + public void merge(FieldAggregator other) { + for (String field : other.aggregatorMap.keySet()) { + // If we already have aggregators for this field, merge the aggregators for the current field from the other aggregated fields into this one. + if (this.aggregatorMap.containsKey(field)) { + Map> thisMap = this.aggregatorMap.get(field); + Map> otherMap = other.aggregatorMap.get(field); + for (AggregateOperation operation : otherMap.keySet()) { + if (thisMap.containsKey(operation)) { + Aggregator currentAggregator = thisMap.get(operation); + Aggregator otherAggregator = otherMap.get(operation); + if (currentAggregator.hasAggregation() && otherAggregator.hasAggregation()) { + currentAggregator.merge(otherAggregator); + } else if (otherAggregator.hasAggregation()) { + thisMap.put(operation, otherAggregator); + } + } else { + thisMap.put(operation, otherMap.get(operation)); + } + } + } else { + // If no aggregators exist in this aggregated fields for the current field, add all aggregators for it. + this.aggregatorMap.put(field, new HashMap<>(other.aggregatorMap.get(field))); + } + } + } + + @Override + public String toString() { + return aggregatorMap.toString(); + } + + /** + * A factory that will generate new {@link FieldAggregator} with the designated sum, max, min, count, and average aggregation field targets. + */ + public static class Factory { + + private final Set sumFields; + private final Set maxFields; + private final Set minFields; + private final Set countFields; + private final Set averageFields; + private final Set allFields; + + public Factory() { + this.sumFields = new HashSet<>(); + this.maxFields = new HashSet<>(); + this.minFields = new HashSet<>(); + this.countFields = new HashSet<>(); + this.averageFields = new HashSet<>(); + this.allFields = new HashSet<>(); + } + + /** + * Set the fields for which to find the aggregated sum. + * + * @param fields + * the fields + * @return this factory + */ + public Factory withSumFields(Set fields) { + addFields(this.sumFields, fields); + return this; + } + + /** + * Set the fields for which to find the aggregated sum. + * + * @param fields + * the fields + * @return this factory + */ + public Factory withSumFields(String... fields) { + addFields(this.sumFields, fields); + return this; + } + + /** + * Set the fields for which to find the aggregated max. + * + * @param fields + * the fields + * @return this factory + */ + public Factory withMaxFields(Set fields) { + addFields(this.maxFields, fields); + return this; + } + + /** + * Set the fields for which to find the aggregated max. + * + * @param fields + * the fields + * @return this factory + */ + public Factory withMaxFields(String... fields) { + addFields(this.maxFields, fields); + return this; + } + + /** + * Set the fields for which to find the aggregated min. + * + * @param fields + * the fields + * @return this factory + */ + public Factory withMinFields(Set fields) { + addFields(this.minFields, fields); + return this; + } + + /** + * Set the fields for which to find the aggregated min. + * + * @param fields + * the fields + * @return this factory + */ + public Factory withMinFields(String... fields) { + addFields(this.minFields, fields); + return this; + } + + /** + * Set the fields for which to find the total number of times seen. + * + * @param fields + * the fields + * @return this factory + */ + public Factory withCountFields(Set fields) { + addFields(this.countFields, fields); + return this; + } + + /** + * Set the fields for which to find the aggregated count. + * + * @param fields + * the fields + * @return this factory + */ + public Factory withCountFields(String... fields) { + addFields(this.countFields, fields); + return this; + } + + /** + * Set the fields for which to find the aggregated average. + * + * @param fields + * the fields + * @return this factory + */ + public Factory withAverageFields(Set fields) { + addFields(this.averageFields, fields); + return this; + } + + /** + * Set the fields for which to find the aggregated average. + * + * @param fields + * the fields + * @return this factory + */ + public Factory withAverageFields(String... fields) { + addFields(this.averageFields, fields); + return this; + } + + /** + * Add the given fields into the given set. + * + * @param set + * the set to add the fields to + * @param fields + * the fields to add + */ + private void addFields(Set set, Collection fields) { + if (fields != null) { + set.addAll(fields); + allFields.addAll(fields); + } + } + + private void addFields(Set set, String... fields) { + addFields(set, Arrays.asList(fields)); + } + + /** + * Return a new {@link FieldAggregator} with the configured target aggregation fields. + * + * @return a new {@link FieldAggregator} instance + */ + public FieldAggregator newInstance() { + return hasFieldsToAggregate() ? new FieldAggregator(sumFields, maxFields, minFields, countFields, averageFields) : new FieldAggregator(); + } + + /** + * Return whether this factory has any target aggregation fields set. + * + * @return true if this factory has any target aggregation fields, or false otherwise + */ + public boolean hasFieldsToAggregate() { + return !allFields.isEmpty(); + } + + /** + * Return whether the given field matches a target aggregation field in this factory. + * + * @param field + * the field + * @return true if the given field is a target for aggregation, or false otherwise + */ + public boolean isFieldToAggregate(String field) { + return allFields.contains(field); + } + + @Override + public String toString() { + return new ToStringBuilder(this).append("sumFields", sumFields).append("maxFields", maxFields).append("minFields", minFields) + .append("countFields", countFields).append("averageFields", averageFields).append("allFields", allFields).toString(); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + Factory factory = (Factory) o; + return Objects.equals(sumFields, factory.sumFields) && Objects.equals(maxFields, factory.maxFields) && Objects.equals(minFields, factory.minFields) + && Objects.equals(countFields, factory.countFields) && Objects.equals(averageFields, factory.averageFields) + && Objects.equals(allFields, factory.allFields); + } + + @Override + public int hashCode() { + return Objects.hash(sumFields, maxFields, minFields, countFields, averageFields, allFields); + } + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/common/grouping/Group.java b/warehouse/query-core/src/main/java/datawave/query/common/grouping/Group.java new file mode 100644 index 0000000000..dc40eee9bd --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/common/grouping/Group.java @@ -0,0 +1,168 @@ +package datawave.query.common.grouping; + +import java.util.Collection; +import java.util.HashSet; +import java.util.Set; + +import org.apache.accumulo.core.security.ColumnVisibility; +import org.apache.commons.lang.builder.ToStringBuilder; + +import com.google.common.collect.HashMultimap; +import com.google.common.collect.Multimap; + +/** + * Represents a grouping of values for fields specified via the #GROUP_BY functionality, with information about the total number of times the grouping was seen, + * values for target aggregation fields that were matched to this group, and the different column visibilities seen. + */ +public class Group { + + /** + * The distinct set of values that represent this grouping. + */ + private final ImmutableGrouping grouping; + + /** + * The different column visibilities seen for each attribute that makes up the grouping. + */ + private final Multimap,ColumnVisibility> attributeVisibilities = HashMultimap.create(); + + /** + * The column visibilities for each document that contributed entries to this grouping. + */ + private final Set documentVisibilities = new HashSet<>(); + + /** + * The total number of times the distinct grouping was seen. + */ + private int count; + + /** + * The aggregated values for any specified fields to aggregate. + */ + private FieldAggregator fieldAggregator = new FieldAggregator(); + + public Group(Grouping grouping) { + this(grouping, 0); + } + + public Group(Grouping grouping, int count) { + this.grouping = new ImmutableGrouping(grouping); + addAttributeVisibilities(this.grouping); + this.count = count; + } + + /** + * Returns the distinct set of values that represent this grouping. + * + * @return the grouping + */ + public Grouping getGrouping() { + return grouping; + } + + /** + * Add the column visibilities from each of the given attributes to the set of attribute visibilities for this group. + * + * @param grouping + * the attributes to add visibilities from + */ + public void addAttributeVisibilities(Grouping grouping) { + for (GroupingAttribute attribute : grouping) { + attributeVisibilities.put(attribute, attribute.getColumnVisibility()); + } + } + + /** + * Return the set of column visibilities seen for the given attribute. + * + * @param attribute + * the attribute + * @return the column visibilities seen for the given attributes + */ + public Collection getVisibilitiesForAttribute(GroupingAttribute attribute) { + return attributeVisibilities.get(attribute); + } + + /** + * Add the column visibility to the set of visibilities of documents for which we have seen the grouping of this group in. + * + * @param columnVisibility + * the visibility to add + */ + public void addDocumentVisibility(ColumnVisibility columnVisibility) { + this.documentVisibilities.add(columnVisibility); + } + + /** + * Return the set of all distinct column visibilities from documents that we have seen this group in. + * + * @return the document column visibilities + */ + public Set getDocumentVisibilities() { + return documentVisibilities; + } + + /** + * Increment the number of times we have seen this grouping by one. + */ + public void incrementCount() { + this.count++; + } + + /** + * Returns the number of times we have seen this grouping. + * + * @return the number of times we've seen this group. + */ + public int getCount() { + return count; + } + + /** + * Returns the aggregated fields for this group. + * + * @return the aggregated fields. + */ + public FieldAggregator getFieldAggregator() { + return fieldAggregator; + } + + /** + * Set the aggregated fields for this group. + * + * @param fieldAggregator + * the aggregated fields to set + */ + public void setFieldAggregator(FieldAggregator fieldAggregator) { + this.fieldAggregator = fieldAggregator; + } + + public void aggregateAll(Collection fields) { + fieldAggregator.aggregateAll(fields); + } + + public void aggregateAll(String field, Collection fields) { + fieldAggregator.aggregateAll(field, fields); + } + + /** + * Merge the given group into this group. The attribute visibilities and document visibilities from the other group will be added into this group. The count + * for this group will be incremented by the count of the other group. The aggregated fields of the other group will be merged into the aggregated fields of + * this group. + * + * @param other + * the group to merge + */ + public void merge(Group other) { + this.attributeVisibilities.putAll(other.attributeVisibilities); + this.documentVisibilities.addAll(other.documentVisibilities); + this.count += other.count; + this.fieldAggregator.merge(other.fieldAggregator); + } + + @Override + public String toString() { + return new ToStringBuilder(this).append("attributes", grouping).append("attributeVisibilities", attributeVisibilities) + .append("documentVisibilities", documentVisibilities).append("count", count).append("aggregatedFields", fieldAggregator).toString(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupFields.java b/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupFields.java new file mode 100644 index 0000000000..196db28e23 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupFields.java @@ -0,0 +1,466 @@ +package datawave.query.common.grouping; + +import java.io.Serializable; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.commons.lang.StringUtils; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonValue; +import com.google.common.collect.HashMultimap; +import com.google.common.collect.Maps; +import com.google.common.collect.Multimap; +import com.google.common.collect.Sets; + +import datawave.query.Constants; +import datawave.query.jexl.JexlASTHelper; + +/** + * Represents a set of fields that have been specified within a {@code #groupby()} function, as well as any fields specified in the functions {@code #sum()}, + * {@code #count()}, {@code #average()}, {@code #min()}, and {@code #max()} that should be used when preforming a group-by operation on documents. This class + * can easily be captured as a parameter string using {@link GroupFields#toString()}, and transformed back into a {@link GroupFields} instance via + * {@link GroupFields#from(String)}. + */ +public class GroupFields implements Serializable { + + private static final String GROUP = "GROUP"; + private static final String SUM = "SUM"; + private static final String COUNT = "COUNT"; + private static final String AVERAGE = "AVERAGE"; + private static final String MIN = "MIN"; + private static final String MAX = "MAX"; + private static final String MODEL_MAP = "REVERSE_MODEL_MAP"; + + private Set groupByFields = new HashSet<>(); + private Set sumFields = new HashSet<>(); + private Set countFields = new HashSet<>(); + private Set averageFields = new HashSet<>(); + private Set minFields = new HashSet<>(); + private Set maxFields = new HashSet<>(); + private Map reverseModelMap = new HashMap<>(); + + /** + * Returns a new {@link GroupFields} parsed the given string. The string is expected to have the format returned by {@link GroupFields#toString()}, but may + * also be a comma-delimited string of fields to group-by to support backwards-compatibility with the legacy format. See below for certain edge cases: + *
    + *
  • Given null, null will be returned.
  • + *
  • Given an empty or blank string, an empty {@link GroupFields} will be returned.
  • + *
  • Given a comma-delimited list of fields, e.g {@code AGE,GENDER}, a {@link GroupFields} with the fields set as the group-by fields will be + * returned.
  • + *
+ * + * @param string + * the string to parse + * @return the parsed {@link GroupFields} + */ + @JsonCreator + public static GroupFields from(String string) { + if (string == null) { + return null; + } + + // Strip whitespaces. + string = StringUtils.deleteWhitespace(string); + + GroupFields groupFields = new GroupFields(); + if (!string.isEmpty()) { + // The string contains group fields in the latest formatting GROUP(field,...)... + if (string.contains(Constants.LEFT_PAREN)) { + // Individual elements are separated by a pipe. + String[] elements = StringUtils.split(string, Constants.PIPE); + + // Each element starts NAME(). + for (String element : elements) { + int leftParen = element.indexOf(Constants.LEFT_PAREN); + int rightParen = element.length() - 1; + String name = element.substring(0, leftParen); + String elementContents = element.substring(leftParen + 1, rightParen); + switch (name) { + case GROUP: + groupFields.groupByFields = parseSet(elementContents); + break; + case SUM: + groupFields.sumFields = parseSet(elementContents); + break; + case COUNT: + groupFields.countFields = parseSet(elementContents); + break; + case AVERAGE: + groupFields.averageFields = parseSet(elementContents); + break; + case MIN: + groupFields.minFields = parseSet(elementContents); + break; + case MAX: + groupFields.maxFields = parseSet(elementContents); + break; + case MODEL_MAP: + groupFields.reverseModelMap = parseMap(elementContents); + break; + default: + throw new IllegalArgumentException("Invalid element " + name); + } + } + } else { + // Otherwise, the string may be in the legacy format of a comma-delimited string with group-fields only. + String[] groupByFields = StringUtils.split(string, Constants.PARAM_VALUE_SEP); + groupFields.setGroupByFields(Sets.newHashSet(groupByFields)); + } + } + return groupFields; + } + + // Parse a set of fields from the string. + private static Set parseSet(String str) { + return Sets.newHashSet(StringUtils.split(str, Constants.COMMA)); + } + + // Parse a map from the given string. + private static Map parseMap(String str) { + Map map = new HashMap<>(); + String[] entries = StringUtils.split(str, Constants.COLON); + for (String entry : entries) { + int equals = entry.indexOf(Constants.EQUALS); + String key = entry.substring(0, equals); + String value = entry.substring(equals + 1); + map.put(key, value); + } + return map; + } + + /** + * Return a copy of the given {@link GroupFields}. + * + * @param other + * the other instance to copy + * @return the copy + */ + public static GroupFields copyOf(GroupFields other) { + if (other == null) { + return null; + } + + GroupFields copy = new GroupFields(); + copy.groupByFields = other.groupByFields == null ? null : Sets.newHashSet(other.groupByFields); + copy.sumFields = other.sumFields == null ? null : Sets.newHashSet(other.sumFields); + copy.countFields = other.countFields == null ? null : Sets.newHashSet(other.countFields); + copy.averageFields = other.averageFields == null ? null : Sets.newHashSet(other.averageFields); + copy.minFields = other.minFields == null ? null : Sets.newHashSet(other.minFields); + copy.maxFields = other.maxFields == null ? null : Sets.newHashSet(other.maxFields); + copy.reverseModelMap = other.reverseModelMap == null ? null : Maps.newHashMap(other.reverseModelMap); + return copy; + } + + /** + * Set the fields to group by. + * + * @param fields + * the fields + */ + public void setGroupByFields(Set fields) { + this.groupByFields = fields; + } + + /** + * Set the fields to sum. + * + * @param fields + * the fields + */ + public void setSumFields(Set fields) { + this.sumFields = fields; + } + + /** + * Set the fields to count. + * + * @param fields + * the fields + */ + public void setCountFields(Set fields) { + this.countFields = fields; + } + + /** + * Set the fields to average. + * + * @param fields + * the fields + */ + public void setAverageFields(Set fields) { + this.averageFields = fields; + } + + /** + * Set the fields to find the min of. + * + * @param fields + * the fields + */ + public void setMinFields(Set fields) { + this.minFields = fields; + } + + /** + * Set the fields to find the max of. + * + * @param fields + * the fields + */ + public void setMaxFields(Set fields) { + this.maxFields = fields; + } + + /** + * Return the fields to group by. + * + * @return the fields + */ + public Set getGroupByFields() { + return groupByFields; + } + + /** + * Return the fields to sum. + * + * @return the fields + */ + public Set getSumFields() { + return sumFields; + } + + /** + * Return the fields to count. + * + * @return the fields + */ + public Set getCountFields() { + return countFields; + } + + /** + * Return the fields to average. + * + * @return the fields + */ + public Set getAverageFields() { + return averageFields; + } + + /** + * Return the fields to find the min of. + * + * @return the fields + */ + public Set getMinFields() { + return minFields; + } + + /** + * Return the fields to find the max of. + * + * @return the fields + */ + public Set getMaxFields() { + return maxFields; + } + + /** + * Return whether this {@link GroupFields} has any fields to group by. + * + * @return true if there are fields to group by, or false otherwise + */ + public boolean hasGroupByFields() { + return groupByFields != null && !groupByFields.isEmpty(); + } + + /** + * Return the set of all fields to group by, sum, count, average, and find the min and max of that must be included in projection. + * + * @return the fields required to be included in projection + */ + public Set getProjectionFields() { + Set fields = new HashSet<>(); + fields.addAll(this.groupByFields); + fields.addAll(this.sumFields); + fields.addAll(this.countFields); + fields.addAll(this.averageFields); + fields.addAll(this.minFields); + fields.addAll(this.maxFields); + fields.addAll(this.reverseModelMap.keySet()); + fields.addAll(this.reverseModelMap.values()); + return fields; + } + + /** + * Deconstruct the identifiers of all fields in this {@link GroupFields}. + */ + public void deconstructIdentifiers() { + this.groupByFields = deconstructIdentifiers(this.groupByFields); + this.sumFields = deconstructIdentifiers(this.sumFields); + this.countFields = deconstructIdentifiers(this.countFields); + this.averageFields = deconstructIdentifiers(this.averageFields); + this.minFields = deconstructIdentifiers(this.minFields); + this.maxFields = deconstructIdentifiers(this.maxFields); + } + + // Return a copy of the given set with all identifiers deconstructed. + private Set deconstructIdentifiers(Set set) { + return set.stream().map(JexlASTHelper::deconstructIdentifier).collect(Collectors.toSet()); + } + + /** + * Modify this {@link GroupFields} to ensure that all sets of fields also include their alternative mappings, and set the model map to the given map. + * + * @param modelMap + * the map to retrieve alternative field mappings from + */ + public void remapFields(Multimap modelMap, Map reverseModelMap) { + this.groupByFields = remap(this.groupByFields, modelMap); + this.sumFields = remap(this.sumFields, modelMap); + this.countFields = remap(this.countFields, modelMap); + this.averageFields = remap(this.averageFields, modelMap); + this.minFields = remap(this.minFields, modelMap); + this.maxFields = remap(this.maxFields, modelMap); + + // Make a copy of the given reverse model map that only contains relevant mappings for efficiency. + Set allFields = new HashSet<>(); + allFields.addAll(groupByFields); + allFields.addAll(sumFields); + allFields.addAll(countFields); + allFields.addAll(averageFields); + allFields.addAll(minFields); + allFields.addAll(maxFields); + + this.reverseModelMap = new HashMap<>(); + for (String field : allFields) { + if (reverseModelMap.containsKey(field)) { + this.reverseModelMap.put(field, reverseModelMap.get(field)); + } + } + + // now we can reduce the fields to only those that map to themselves wrt the reverse model map + this.groupByFields = reduce(this.groupByFields, this.reverseModelMap); + this.sumFields = reduce(this.sumFields, this.reverseModelMap); + this.countFields = reduce(this.countFields, this.reverseModelMap); + this.averageFields = reduce(this.averageFields, this.reverseModelMap); + this.minFields = reduce(this.minFields, this.reverseModelMap); + this.maxFields = reduce(this.maxFields, this.reverseModelMap); + } + + private Set reduce(Set set, Map map) { + return set.stream().filter(s -> s.equals(map.getOrDefault(s, s))).collect(Collectors.toSet()); + } + + // Return a copy of the given set with all alternative field mappings included. + private Set remap(Set set, Multimap map) { + Set newMappings = new HashSet<>(set); + for (String field : set) { + field = field.toUpperCase(); + if (map.containsKey(field)) { + newMappings.addAll(map.get(field)); + } + } + return newMappings; + } + + /** + * Return the model map. This map will never be null, but may be empty if this {@link GroupFields} was never remapped via + * {@link GroupFields#remapFields(Multimap, Map)}. + * + * @return the reverse model map + */ + public Map getReverseModelMap() { + return reverseModelMap; + } + + /** + * Return a new {@link FieldAggregator.Factory} instance configured with the aggregation fields of this {@link GroupFields}. + * + * @return a configured {@link FieldAggregator.Factory} instance + */ + public FieldAggregator.Factory getFieldAggregatorFactory() { + return new FieldAggregator.Factory().withSumFields(sumFields).withCountFields(countFields).withAverageFields(averageFields).withMinFields(minFields) + .withMaxFields(maxFields); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + GroupFields that = (GroupFields) o; + return Objects.equals(groupByFields, that.groupByFields) && Objects.equals(sumFields, that.sumFields) && Objects.equals(countFields, that.countFields) + && Objects.equals(averageFields, that.averageFields) && Objects.equals(minFields, that.minFields) + && Objects.equals(maxFields, that.maxFields) && Objects.equals(reverseModelMap, that.reverseModelMap); + } + + @Override + public int hashCode() { + return Objects.hash(groupByFields, sumFields, countFields, averageFields, minFields, maxFields, reverseModelMap); + } + + @JsonValue + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + writeFormattedSet(sb, GROUP, this.groupByFields); + writeFormattedSet(sb, SUM, this.sumFields); + writeFormattedSet(sb, COUNT, this.countFields); + writeFormattedSet(sb, AVERAGE, this.averageFields); + writeFormattedSet(sb, MIN, this.minFields); + writeFormattedSet(sb, MAX, this.maxFields); + writeFormattedModelMap(sb); + return sb.toString(); + } + + // Write the given set if not empty to the given string builder. + private void writeFormattedSet(StringBuilder sb, String name, Set set) { + if (!set.isEmpty()) { + if (sb.length() > 0) { + sb.append(Constants.PIPE); + } + sb.append(name); + sb.append(Constants.LEFT_PAREN); + Iterator iterator = set.iterator(); + while (iterator.hasNext()) { + String next = iterator.next(); + sb.append(next); + if (iterator.hasNext()) { + sb.append(Constants.COMMA); + } + } + sb.append(Constants.RIGHT_PAREN); + } + } + + // Write the model map if not empty to the given string builder. + private void writeFormattedModelMap(StringBuilder sb) { + if (!reverseModelMap.isEmpty()) { + if (sb.length() > 0) { + sb.append(Constants.PIPE); + } + sb.append(MODEL_MAP).append(Constants.LEFT_PAREN); + Iterator> entryIterator = reverseModelMap.entrySet().iterator(); + while (entryIterator.hasNext()) { + Map.Entry next = entryIterator.next(); + sb.append(next.getKey()).append(Constants.EQUALS).append(next.getValue()); + if (entryIterator.hasNext()) { + sb.append(Constants.COLON); + } + } + sb.append(Constants.RIGHT_PAREN); + } + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/common/grouping/Grouping.java b/warehouse/query-core/src/main/java/datawave/query/common/grouping/Grouping.java new file mode 100644 index 0000000000..736dbb4c0d --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/common/grouping/Grouping.java @@ -0,0 +1,118 @@ +package datawave.query.common.grouping; + +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.function.Predicate; + +/** + * This class represents a {@link HashSet} of {@link GroupingAttribute} elements that maintains a cached hashcode that is calculated once at instantiation, and + * subsequently recalculated any time this set is modified. This class is used as a key within maps and as such, the cached hashcode allows us to avoid + * calculating the hashcode each time a search operation is performed on the keys of the maps. + */ +public class Grouping extends HashSet> { + + public static final Grouping EMPTY_GROUPING = new Grouping(Collections.emptySet()); + + public static Grouping emptyGrouping() { + return EMPTY_GROUPING; + } + + // The cached hashcode. + private int cachedHashcode; + + /** + * Return a new {@link Grouping} instance containing the elements of the given collection. + * + * @param collection + * the collection + * @return the new grouping + */ + public static Grouping of(Collection> collection) { + return new Grouping(collection); + } + + public Grouping() { + super(); + updateCachedHashcode(); + } + + public Grouping(GroupingAttribute attribute) { + super(); + add(attribute); + updateCachedHashcode(); + } + + public Grouping(Collection> collection) { + super(collection); + updateCachedHashcode(); + } + + @Override + public boolean add(GroupingAttribute groupingAttribute) { + boolean modified = super.add(groupingAttribute); + if (modified) { + updateCachedHashcode(); + } + return modified; + } + + @Override + public boolean addAll(Collection> collection) { + boolean modified = super.addAll(collection); + if (modified) { + updateCachedHashcode(); + } + return modified; + } + + @Override + public boolean remove(Object o) { + boolean modified = super.remove(o); + if (modified) { + updateCachedHashcode(); + } + return modified; + } + + @Override + public boolean removeAll(Collection collection) { + boolean modified = super.removeAll(collection); + if (modified) { + updateCachedHashcode(); + } + return modified; + } + + @Override + public boolean removeIf(Predicate> filter) { + boolean modified = super.removeIf(filter); + if (modified) { + updateCachedHashcode(); + } + return modified; + } + + @Override + public void clear() { + super.clear(); + updateCachedHashcode(); + } + + /** + * Returns the cached hashcode. + * + * @return the hashcode + */ + @Override + public int hashCode() { + return cachedHashcode; + } + + /** + * Update the cached hashcode based on the current elements. + */ + private void updateCachedHashcode() { + cachedHashcode = super.hashCode(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupingAttribute.java b/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupingAttribute.java new file mode 100644 index 0000000000..54db8ddf71 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupingAttribute.java @@ -0,0 +1,62 @@ +package datawave.query.common.grouping; + +import org.apache.accumulo.core.data.Key; +import org.apache.commons.lang.builder.HashCodeBuilder; + +import datawave.data.type.Type; +import datawave.query.attributes.Attribute; +import datawave.query.attributes.TypeAttribute; + +/** + * This class serves as a wrapper for the {@link TypeAttribute} that overrides the default {@code equals()} and {@code hashCode()} behavior so that equality is + * determined by the attribute's field and value, and the hashCode is generated solely with the attribute's value. + * + * @param + * the delegate type + */ +public class GroupingAttribute> extends TypeAttribute { + + public GroupingAttribute(Type type, Key key, boolean toKeep) { + super(type, key, toKeep); + } + + /** + * Returns whether the other attribute has the same field and value. + * + * @param other + * the other attribute + * @return true if the attribute is considered equal, or false otherwise + */ + @Override + public boolean equals(Object other) { + if (null == other) { + return false; + } + if (other instanceof TypeAttribute) { + TypeAttribute otherType = (TypeAttribute) other; + return this.getType().equals(otherType.getType()) && isMetadataRowEqual(otherType); + } + return false; + } + + /** + * Return whether the metadata row of this attribute is considered equal to the row of the other attribute. + * + * @param other + * the other attribute + * @return true if the metadata row is equal, or false otherwise + */ + private boolean isMetadataRowEqual(Attribute other) { + return this.isMetadataSet() == other.isMetadataSet() && (!this.isMetadataSet() || (this.getMetadata().getRow().equals(other.getMetadata().getRow()))); + } + + /** + * Returns the hashcode of the attribute's value. + * + * @return the hashcode of the attribute's value + */ + @Override + public int hashCode() { + return new HashCodeBuilder(2099, 2129).append(getType().getDelegateAsString()).toHashCode(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupingUtil.java b/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupingUtil.java deleted file mode 100644 index 2ac3d8a461..0000000000 --- a/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupingUtil.java +++ /dev/null @@ -1,330 +0,0 @@ -package datawave.query.common.grouping; - -import static org.slf4j.LoggerFactory.getLogger; - -import java.math.BigDecimal; -import java.util.Arrays; -import java.util.Collection; -import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedHashSet; -import java.util.Map; -import java.util.Set; -import java.util.stream.IntStream; - -import org.apache.accumulo.core.data.Key; -import org.apache.accumulo.core.security.ColumnVisibility; -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.slf4j.Logger; - -import com.google.common.collect.HashMultimap; -import com.google.common.collect.Maps; -import com.google.common.collect.Multimap; -import com.google.common.collect.SortedSetMultimap; -import com.google.common.collect.TreeMultimap; - -import datawave.data.type.Type; -import datawave.marking.MarkingFunctions; -import datawave.query.attributes.Attribute; -import datawave.query.attributes.Document; -import datawave.query.attributes.TypeAttribute; - -/** - * Provides functionality commonly needed to group documents (regardless if done server or client side). - * - * This class and its methods aren't static so that we don't run into concurrency issues, although all required state should be passed into the individual - * methods and not kept in this class. Calling classes could extend this class to inherit the methods, but the state still shouldn't be inherited because not - * all callers will be able to easily extend this class if they already/need to extend other parents. - */ -public class GroupingUtil { - - private static final Logger log = getLogger(GroupingUtil.class); - - public ColumnVisibility combine(Collection in, MarkingFunctions markingFunctions) { - try { - ColumnVisibility columnVisibility = markingFunctions.combine(in); - log.trace("combined {} into {}", in, columnVisibility); - return columnVisibility; - } catch (MarkingFunctions.Exception e) { - log.warn("unable to combine visibilities from {}", in); - } - return new ColumnVisibility(); - } - - /** - * This method mutates the countingMap argument that is passed into it. The caller may either anticipate that (and hopefully make a comment when this method - * is called that it is expecting the countingMap to be mutated) or the caller can reset the instance of countingMap by calling getCountingMap on the - * GroupInfo object (clearer, but relies more on garbage collection) - * - * @param entry - * the map entry - * @param groupFieldsSet - * group fields set - * @param countingMap - * the counting map - * @return grouping info - */ - public GroupingInfo getGroupingInfo(Map.Entry entry, Set groupFieldsSet, GroupCountingHashMap countingMap) { - return getGroupingInfo(entry, groupFieldsSet, countingMap, null); - } - - public GroupingInfo getGroupingInfo(Map.Entry entry, Set groupFieldsSet, GroupCountingHashMap countingMap, - Map reverseModelMapping) { - log.trace("apply to {}", entry); - - // mapping of field name (with grouping context) to value attribute - Map> fieldMap = Maps.newHashMap(); - - // holds the aggregated column visibilities for each grouped event - Multimap>,ColumnVisibility> fieldVisibilities = HashMultimap.create(); - - if (entry != null) { - Set expandedGroupFieldsList = new LinkedHashSet<>(); - Map>> dictionary = entry.getValue().getDictionary(); - Map countKeyMap = new HashMap<>(); - dictionary.keySet().stream().filter(key -> key.startsWith("COUNT")).filter(countKey -> entry.getValue().getDictionary().containsKey(countKey)) - .forEach(countKey -> { - TypeAttribute countTypeAttribute = ((TypeAttribute) entry.getValue().getDictionary().get(countKey)); - int count = ((BigDecimal) countTypeAttribute.getType().getDelegate()).intValue(); - countKeyMap.put(countKey, count); - }); - - Multimap fieldToFieldWithContextMap = getFieldToFieldWithGroupingContextMap(entry.getValue(), expandedGroupFieldsList, fieldMap, - groupFieldsSet, reverseModelMapping); - log.trace("got a new fieldToFieldWithContextMap: {}", fieldToFieldWithContextMap); - int longest = longestValueList(fieldToFieldWithContextMap); - for (int i = 0; i < longest; i++) { - Collection> fieldCollection = new HashSet<>(); - String currentGroupingContext = ""; - for (String fieldListItem : expandedGroupFieldsList) { - log.trace("fieldListItem: {}", fieldListItem); - Collection gtNames = fieldToFieldWithContextMap.get(fieldListItem); - if (gtNames == null || gtNames.isEmpty()) { - log.trace("gtNames: {}", gtNames); - log.trace("fieldToFieldWithContextMap: {} did not contain: {}", fieldToFieldWithContextMap, fieldListItem); - } else { - String gtName = gtNames.iterator().next(); - int idx = gtName.indexOf('.'); - if (idx != -1) { - currentGroupingContext = gtName.substring(idx + 1); - } - if (!fieldListItem.equals(gtName)) { - fieldToFieldWithContextMap.remove(fieldListItem, gtName); - } - log.trace("fieldToFieldWithContextMap now: {}", fieldToFieldWithContextMap); - log.trace("gtName: {}", gtName); - fieldCollection.add(fieldMap.get(gtName)); - } - } - - if (fieldCollection.size() == expandedGroupFieldsList.size()) { - - // get the count out of the countKeyMap - Integer count = countKeyMap.get("COUNT." + currentGroupingContext); - if (count == null) - count = 1; - // see above comment about the COUNT field - log.trace("adding {} of {} to counting map", count, fieldCollection); - IntStream.range(0, count).forEach(j -> countingMap.add(fieldCollection)); - fieldVisibilities.put(fieldCollection, entry.getValue().getColumnVisibility()); - log.trace("put {} to {} into fieldVisibilities {}", fieldCollection, entry.getValue().getColumnVisibility(), fieldVisibilities); - } else { - log.trace("fieldList.size() != this.expandedGroupFieldsList.size()"); - log.trace("fieldList: {}", fieldCollection); - log.trace("expandedGroupFieldsList: {}", expandedGroupFieldsList); - } - } - - log.trace("countingMap: {}", countingMap); - } - - return new GroupingInfo(countingMap, fieldVisibilities); - } - - private Multimap getFieldToFieldWithGroupingContextMap(Document d, Set expandedGroupFieldsList, - Map> fieldMap, Set groupFieldsSet, Map reverseModelMapping) { - - Multimap fieldToFieldWithContextMap = TreeMultimap.create(); - for (Map.Entry>> entry : d.entrySet()) { - Attribute field = entry.getValue(); - log.trace("field is {}", field); - String fieldName = entry.getKey(); - String shortName = fieldName; - String shorterName = shortName; - if (shortName.indexOf('.') != -1) - shortName = shortName.substring(0, shortName.lastIndexOf('.')); - if (shorterName.indexOf('.') != -1) - shorterName = shorterName.substring(0, shorterName.indexOf('.')); - log.trace("fieldName: {}, shortName: {}", fieldName, shortName); - if (reverseModelMapping != null) { - String finalName = reverseModelMapping.get(shorterName); - if (finalName != null) { - shortName = finalName + shortName.substring(shorterName.length()); - fieldName = finalName + fieldName.substring(shorterName.length()); - shorterName = finalName; - } - } - if (groupFieldsSet.contains(shorterName)) { - expandedGroupFieldsList.add(shortName); - log.trace("{} contains {}", groupFieldsSet, shorterName); - - if (field.getData() instanceof Collection) { - // This handles multivalued entries that do not have grouping context - // Create GroupingTypeAttribute and put in ordered map ordered on the attribute type - SortedSetMultimap,GroupingTypeAttribute> attrSortedMap = TreeMultimap.create(); - for (Object typeAttribute : ((Collection) field.getData())) { - Type type = ((TypeAttribute) typeAttribute).getType(); - GroupingTypeAttribute created = new GroupingTypeAttribute<>(type, new Key(shortName), true); - created.setColumnVisibility(field.getColumnVisibility()); - attrSortedMap.put(type, created); - } - - // Add GroupingTypeAttribute to fieldMap with a grouping context that is based on ordered attribute type - int i = 0; - for (Map.Entry,GroupingTypeAttribute> sortedEntry : attrSortedMap.entries()) { - String fieldNameWithContext = fieldName + "." + i++; - fieldMap.put(fieldNameWithContext, sortedEntry.getValue()); - fieldToFieldWithContextMap.put(shortName, fieldNameWithContext); - } - } else { - GroupingTypeAttribute created = new GroupingTypeAttribute<>((Type) field.getData(), new Key(shortName), true); - created.setColumnVisibility(field.getColumnVisibility()); - fieldMap.put(fieldName, created); - fieldToFieldWithContextMap.put(shortName, fieldName); - } - } else { - log.trace("{} does not contain {}", groupFieldsSet, shorterName); - } - } - log.trace("fieldMap: {}", fieldMap); - log.trace("fields: {}", d.entrySet()); - log.trace("fieldToFieldWithGroupingContextMap: {}", fieldToFieldWithContextMap); - log.trace("expandedGroupFieldsList: {}", expandedGroupFieldsList); - return fieldToFieldWithContextMap; - } - - private static int longestValueList(Multimap in) { - int max = 0; - for (Collection valueCollection : in.asMap().values()) { - max = Math.max(max, valueCollection.size()); - } - return max; - } - - /** - * Provides a clear way to return multiple things related to grouping that are generated from one method. - */ - public static class GroupingInfo { - - private final GroupCountingHashMap countingMap; - - private final Multimap>,ColumnVisibility> fieldVisibilities; - - GroupingInfo(GroupCountingHashMap countingMap, Multimap>,ColumnVisibility> fieldVisibilities) { - this.countingMap = countingMap; - this.fieldVisibilities = fieldVisibilities; - } - - public GroupCountingHashMap getCountsMap() { - return countingMap; - } - - public Multimap>,ColumnVisibility> getFieldVisibilities() { - return fieldVisibilities; - } - } - - public static class GroupCountingHashMap extends HashMap>,Integer> { - - private static final Logger log = getLogger(GroupCountingHashMap.class); - - private MarkingFunctions markingFunctions; - - public GroupCountingHashMap(MarkingFunctions markingFunctions) { - this.markingFunctions = markingFunctions; - } - - public int add(Collection> in) { - int count = 0; - if (super.containsKey(in)) { - count = super.get(in); - // aggregate the visibilities - combine(this.keySet(), in); - } - count++; - super.put(in, count); - return count; - } - - private void combine(Set>> existingMapKeys, Collection> incomingAttributes) { - - // for each Attribute in the incomingAttributes, find the existing map key attribute that matches its data. - // combine the column visibilities of the incoming attribute and the existing one, and set - // the column visibility of the EXISTING map key to the new value. - // Note that the hashCode and equals methods for the GroupingTypeAttribute will ignore the metadata (which contains the column visibility) - incomingAttributes.forEach(incomingAttribute -> { - existingMapKeys.stream().flatMap(Collection::stream) - // if the existing and incoming attributes are equal (other than the metadata), the incoming attribute's visibility will be - // considered for merging into the existing attribute unless the column visibilities are already equal - .filter(existingAttribute -> existingAttribute.getData().equals(incomingAttribute.getData()) - && !existingAttribute.getColumnVisibility().equals(incomingAttribute.getColumnVisibility())) - .forEach(existingAttribute -> existingAttribute.setColumnVisibility( - combine(Arrays.asList(existingAttribute.getColumnVisibility(), incomingAttribute.getColumnVisibility())))); - }); - } - - private ColumnVisibility combine(Collection in) { - try { - ColumnVisibility columnVisibility = markingFunctions.combine(in); - log.trace("combined {} into {}", in, columnVisibility); - return columnVisibility; - } catch (MarkingFunctions.Exception e) { - log.warn("was unable to combine visibilities from {}", in); - } - return new ColumnVisibility(); - } - - } - - public static class GroupingTypeAttribute> extends TypeAttribute { - - public GroupingTypeAttribute(Type type, Key key, boolean toKeep) { - super(type, key, toKeep); - } - - @Override - public boolean equals(Object o) { - if (null == o) { - return false; - } - - if (o instanceof TypeAttribute) { - TypeAttribute other = (TypeAttribute) o; - return this.getType().equals(other.getType()) && (0 == this.compareMetadataRow(other)); - } - return false; - } - - private int compareMetadataRow(Attribute other) { - if (this.isMetadataSet() != other.isMetadataSet()) { - if (this.isMetadataSet()) { - return 1; - } else { - return -1; - } - } else if (this.isMetadataSet()) { - return this.metadata.compareRow(other.getMetadata().getRow()); - } else { - return 0; - } - } - - @Override - public int hashCode() { - HashCodeBuilder hcb = new HashCodeBuilder(2099, 2129); - hcb.append(getType().getDelegateAsString()); - return hcb.toHashCode(); - } - } - -} diff --git a/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupingUtils.java b/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupingUtils.java new file mode 100644 index 0000000000..02e7a1b846 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupingUtils.java @@ -0,0 +1,267 @@ +package datawave.query.common.grouping; + +import static org.slf4j.LoggerFactory.getLogger; + +import java.math.BigDecimal; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.security.ColumnVisibility; +import org.slf4j.Logger; + +import com.google.common.base.Preconditions; + +import datawave.data.type.NumberType; +import datawave.marking.MarkingFunctions; +import datawave.query.attributes.Document; +import datawave.query.attributes.TypeAttribute; + +/** + * This class contains utility functions used by multiple classes for grouping operations. + */ +public class GroupingUtils { + + public enum AverageAggregatorWriteFormat { + AVERAGE, NUMERATOR_AND_DIVISOR + } + + private static final Logger log = getLogger(GroupingUtils.class); + + /** + * Returns a column visibility that results from the combination of all given visibilities using the given {@link MarkingFunctions}. + * + * @param visibilities + * the visibilities to combine + * @param markingFunctions + * the marking functions to combine the visibilities with + * @param failOnError + * if true and the visibilities cannot be combined, an {@link IllegalArgumentException} will be thrown. If false and the visibilities cannot be + * combined, it will be logged and a new, blank {@link ColumnVisibility} will be returned. + * @return the combined column visibility + */ + public static ColumnVisibility combineVisibilities(Collection visibilities, MarkingFunctions markingFunctions, boolean failOnError) { + try { + return markingFunctions.combine(visibilities); + } catch (MarkingFunctions.Exception e) { + if (failOnError) { + throw new IllegalArgumentException("Unable to combine visibilities: " + visibilities, e); + } else { + log.warn("Unable to combine visibilities from {}", visibilities); + } + } + return new ColumnVisibility(); + } + + /** + * Create and return a new {@link Document} with the given group information embedded into it. + * + * @param group + * the group + * @param keys + * the list of iterator keys that have been read + * @param markingFunctions + * the marking functions to use when combining column visibilities + * @param averageWriteFormat + * the format to use when writing aggregated averages to the document + * @return the new document + */ + public static Document createDocument(Group group, List keys, MarkingFunctions markingFunctions, AverageAggregatorWriteFormat averageWriteFormat) { + Preconditions.checkState(!keys.isEmpty(), "No available keys for grouping results"); + + // Use the last (most recent) key so a new iterator will know where to start. + Key key = keys.get(keys.size() - 1); + Document document = new Document(key, true); + + // Set the visibility for the document to the combined visibility of each previous document in which this grouping was seen in. + document.setColumnVisibility(combineVisibilities(group.getDocumentVisibilities(), markingFunctions, true)); + + // Add each of the grouping attributes to the document. + for (GroupingAttribute attribute : group.getGrouping()) { + // Update the visibility to the combined visibilities of each visibility seen for this attribute in a grouping. + attribute.setColumnVisibility(combineVisibilities(group.getVisibilitiesForAttribute(attribute), markingFunctions, false)); + document.put(attribute.getMetadata().getRow().toString(), attribute); + } + + // Add an attribute for the count. + NumberType type = new NumberType(); + type.setDelegate(new BigDecimal(group.getCount())); + TypeAttribute attr = new TypeAttribute<>(type, new Key("count"), true); + document.put("COUNT", attr); + + // Add each aggregated field. + FieldAggregator fieldAggregator = group.getFieldAggregator(); + if (fieldAggregator != null) { + Map>> aggregatorMap = group.getFieldAggregator().getAggregatorMap(); + for (Map.Entry>> entry : aggregatorMap.entrySet()) { + for (Aggregator aggregator : entry.getValue().values()) { + String field = aggregator.getField(); + // Do not include an entry for the aggregation if it is null (indicating that no entries were found to be aggregated). The exception to this + // is + // the #COUNT aggregation. This will return a non-null value of 0 if no entries were found to be aggregated, and can be included in the + // final + // output. + if (aggregator.getAggregation() != null) { + switch (aggregator.getOperation()) { + case SUM: + addSumAggregation(document, field, ((SumAggregator) aggregator), markingFunctions); + break; + case COUNT: + addCountAggregation(document, field, ((CountAggregator) aggregator), markingFunctions); + break; + case MIN: + addMinAggregation(document, field, ((MinAggregator) aggregator)); + break; + case MAX: + addMaxAggregation(document, field, ((MaxAggregator) aggregator)); + break; + case AVERAGE: + switch (averageWriteFormat) { + case AVERAGE: + addAverage(document, field, ((AverageAggregator) aggregator), markingFunctions); + break; + case NUMERATOR_AND_DIVISOR: + addAverageNumeratorAndDivisor(document, field, ((AverageAggregator) aggregator), markingFunctions); + break; + } + break; + } + } + } + } + } + + return document; + } + + /** + * Add the aggregated sum for the specified field to the document. + * + * @param document + * the document + * @param field + * the field + * @param aggregator + * the aggregator + * @param markingFunctions + * the marking functions to use when combining column visibilities + */ + private static void addSumAggregation(Document document, String field, SumAggregator aggregator, MarkingFunctions markingFunctions) { + NumberType type = new NumberType(); + type.setDelegate(aggregator.getAggregation()); + TypeAttribute sumAttribute = new TypeAttribute<>(type, new Key(field + "_sum"), true); + sumAttribute.setColumnVisibility(combineVisibilities(aggregator.getColumnVisibilities(), markingFunctions, false)); + document.put(field + DocumentGrouper.FIELD_SUM_SUFFIX, sumAttribute); + } + + /** + * Add the aggregated count for the specified field to the document. + * + * @param document + * the document + * @param field + * the field + * @param aggregator + * the aggregator + * @param markingFunctions + * the marking functions to use when combining column visibilities + */ + private static void addCountAggregation(Document document, String field, CountAggregator aggregator, MarkingFunctions markingFunctions) { + NumberType type = new NumberType(); + type.setDelegate(BigDecimal.valueOf(aggregator.getAggregation())); + TypeAttribute sumAttribute = new TypeAttribute<>(type, new Key(field + "_count"), true); + Set columnVisibilities = aggregator.getColumnVisibilities(); + if (!columnVisibilities.isEmpty()) { + sumAttribute.setColumnVisibility(combineVisibilities(aggregator.getColumnVisibilities(), markingFunctions, false)); + } + document.put(field + DocumentGrouper.FIELD_COUNT_SUFFIX, sumAttribute); + } + + /** + * Add the aggregated min for the specified field to the document. + * + * @param document + * the document + * @param field + * the field + * @param aggregator + * the aggregator + */ + private static void addMinAggregation(Document document, String field, MinAggregator aggregator) { + document.put(field + DocumentGrouper.FIELD_MIN_SUFFIX, aggregator.getAggregation()); + } + + /** + * Add the aggregated max for the specified field to the document. + * + * @param document + * the document + * @param field + * the field + * @param aggregator + * the aggregator + */ + private static void addMaxAggregation(Document document, String field, MaxAggregator aggregator) { + document.put(field + DocumentGrouper.FIELD_MAX_SUFFIX, aggregator.getAggregation()); + } + + /** + * Add the aggregated average for the specified field to the document. + * + * @param document + * the document + * @param field + * the field + * @param aggregator + * the aggregator + * @param markingFunctions + * the marking functions to use when combining column visibilities + */ + private static void addAverage(Document document, String field, AverageAggregator aggregator, MarkingFunctions markingFunctions) { + NumberType type = new NumberType(); + type.setDelegate(aggregator.getAggregation()); + TypeAttribute attribute = new TypeAttribute<>(type, new Key(field + "_average"), true); + attribute.setColumnVisibility(combineVisibilities(aggregator.getColumnVisibilities(), markingFunctions, false)); + document.put(field + DocumentGrouper.FIELD_AVERAGE_SUFFIX, attribute); + } + + /** + * Add the numerator and divisor of the aggregated average for the specified field to the document. + * + * @param document + * the document + * @param field + * the field + * @param aggregator + * the aggregator + * @param markingFunctions + * the marking functions to use when combining column visibilities + */ + private static void addAverageNumeratorAndDivisor(Document document, String field, AverageAggregator aggregator, MarkingFunctions markingFunctions) { + ColumnVisibility visibility = combineVisibilities(aggregator.getColumnVisibilities(), markingFunctions, false); + + // Add an attribute for the average's numerator. This is required to properly combine additional aggregations in future groupings. + NumberType numeratorType = new NumberType(); + numeratorType.setDelegate(aggregator.getNumerator()); + TypeAttribute sumAttribute = new TypeAttribute<>(numeratorType, new Key(field + "_average_numerator"), true); + sumAttribute.setColumnVisibility(visibility); + document.put(field + DocumentGrouper.FIELD_AVERAGE_NUMERATOR_SUFFIX, sumAttribute); + + // Add an attribute for the average's divisor. This is required to properly combine additional aggregations in future groupings. + NumberType divisorType = new NumberType(); + divisorType.setDelegate(aggregator.getDivisor()); + TypeAttribute countAttribute = new TypeAttribute<>(divisorType, new Key(field + "_average_divisor"), true); + countAttribute.setColumnVisibility(visibility); + document.put(field + DocumentGrouper.FIELD_AVERAGE_DIVISOR_SUFFIX, countAttribute); + } + + /** + * Do not allow new instances of this class to be created. + */ + private GroupingUtils() { + throw new UnsupportedOperationException(); + } + +} diff --git a/warehouse/query-core/src/main/java/datawave/query/common/grouping/Groups.java b/warehouse/query-core/src/main/java/datawave/query/common/grouping/Groups.java new file mode 100644 index 0000000000..f151e1bff3 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/common/grouping/Groups.java @@ -0,0 +1,108 @@ +package datawave.query.common.grouping; + +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; + +/** + * Represents a set of groups found during a #GROUP_BY operation. + */ +public class Groups { + + /** + * A map of distinct grouping values to their groups. + */ + private final Map groups = new HashMap<>(); + + /** + * Returns the collection of {@link Group} in this {@link Groups}. + * + * @return the groups + */ + public Collection getGroups() { + return groups.values(); + } + + public int totalGroups() { + return groups.size(); + } + + /** + * Return whether this {@link Groups} contains a {@link Group} for the given grouping values. + * + * @param attributes + * the grouping values + * @return true if this {@link Groups} contains a {@link Group} for the given grouping values, or false otherwise + */ + public boolean containsGroup(Grouping attributes) { + return groups.containsKey(attributes); + } + + /** + * Return the {@link Group} for the given grouping values, or null if there is no match. + * + * @param attributes + * the grouping values + * @return the {@link Group} + */ + public Group getGroup(Grouping attributes) { + return groups.get(attributes); + } + + /** + * Put the given {@link Group} into this {@link Groups} + * + * @param group + * the group to put + */ + public void putGroup(Group group) { + this.groups.put(group.getGrouping(), group); + } + + /** + * If this {@link Groups} already contains a {@link Group} with the grouping values of the given group, the given group will be merged into the existing + * group. Otherwise, the given group will be put into this {@link Groups}. + * + * @param group + * the group to merge or put + */ + public void mergeOrPutGroup(Group group) { + if (containsGroup(group.getGrouping())) { + Group existing = getGroup(group.getGrouping()); + existing.merge(group); + } else { + putGroup(group); + } + } + + public void mergeAll(Groups currentGroups) { + currentGroups.groups.values().forEach(this::mergeOrPutGroup); + } + + /** + * Return whether this {@link Groups} contains any groups. + * + * @return true if this {@link Groups} does not contain any groups, or false otherwise + */ + public boolean isEmpty() { + return this.groups.isEmpty(); + } + + /** + * Clears all groups in this {@link Groups}. + */ + public void clear() { + this.groups.clear(); + } + + @Override + public String toString() { + return groups.toString(); + } + + public void aggregateToAllGroups(Collection fields) { + for (Group group : groups.values()) { + group.aggregateAll(fields); + } + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/common/grouping/ImmutableGrouping.java b/warehouse/query-core/src/main/java/datawave/query/common/grouping/ImmutableGrouping.java new file mode 100644 index 0000000000..a66a9e634d --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/common/grouping/ImmutableGrouping.java @@ -0,0 +1,95 @@ +package datawave.query.common.grouping; + +import java.util.Collection; +import java.util.function.Predicate; + +/** + * This class represents an immutable version of {@link Grouping} that cannot be modified. + */ +public class ImmutableGrouping extends Grouping { + + public ImmutableGrouping(Collection> collection) { + super(); + for (GroupingAttribute groupingAttribute : collection) { + // Do not use super.addAll, otherwise ImmutableGrouping.add() will be subsequently called and an exception will be thrown. + // noinspection UseBulkOperation + super.add(groupingAttribute); + } + } + + /** + * Throws {@link UnsupportedOperationException}. + * + * @param groupingAttribute + * element whose presence in this collection is to be ensured + * @throws UnsupportedOperationException + * always + */ + @Override + public boolean add(GroupingAttribute groupingAttribute) { + throw new UnsupportedOperationException(); + } + + /** + * Throws {@link UnsupportedOperationException}. + * + * @param collection + * collection containing elements to be added to this collection + * @throws UnsupportedOperationException + * always + */ + @Override + public boolean addAll(Collection> collection) { + throw new UnsupportedOperationException(); + } + + /** + * Throws {@link UnsupportedOperationException}. + * + * @param o + * object to be removed from this set, if present + * @throws UnsupportedOperationException + * always + */ + @Override + public boolean remove(Object o) { + throw new UnsupportedOperationException(); + } + + /** + * Throws {@link UnsupportedOperationException}. + * + * @param collection + * collection containing elements to be removed from this set + * @throws UnsupportedOperationException + * always + */ + @Override + public boolean removeAll(Collection collection) { + throw new UnsupportedOperationException(); + } + + /** + * Throws {@link UnsupportedOperationException}. + * + * @param filter + * a predicate which returns {@code true} for elements to be removed + * @throws UnsupportedOperationException + * always + */ + @Override + public boolean removeIf(Predicate> filter) { + throw new UnsupportedOperationException(); + } + + /** + * Throws {@link UnsupportedOperationException}. + * + * @throws UnsupportedOperationException + * always + */ + @Override + public void clear() { + throw new UnsupportedOperationException(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/common/grouping/MaxAggregator.java b/warehouse/query-core/src/main/java/datawave/query/common/grouping/MaxAggregator.java new file mode 100644 index 0000000000..da9f538040 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/common/grouping/MaxAggregator.java @@ -0,0 +1,110 @@ +package datawave.query.common.grouping; + +import java.util.Collections; +import java.util.Set; + +import org.apache.accumulo.core.security.ColumnVisibility; +import org.apache.commons.lang.builder.ToStringBuilder; + +import datawave.query.attributes.Attribute; + +/** + * Determines the max of aggregated field values. This supports fields that have {@link datawave.query.attributes.Numeric} values, + * {@link datawave.query.attributes.DateContent} values, and values that have {@link String} data types. + */ +public class MaxAggregator extends AbstractAggregator> { + + private Attribute max; + + public static MaxAggregator of(String field, Attribute max) { + return new MaxAggregator(field, max); + } + + public MaxAggregator(String field) { + super(field); + } + + private MaxAggregator(String field, Attribute max) { + this(field); + this.max = max; + } + + /** + * Returns {@link AggregateOperation#MAX}. + * + * @return {@link AggregateOperation#MAX} + */ + @Override + public AggregateOperation getOperation() { + return AggregateOperation.MAX; + } + + /** + * Returns a singleton set containing the column visibility of the max attribute found. Possible empty, but never null. + * + * @return a set containing the column visibility + */ + @Override + public Set getColumnVisibilities() { + if (max != null) { + return Collections.singleton(max.getColumnVisibility()); + } + return Collections.emptySet(); + } + + /** + * Return the attribute with the max value seen of all attributes aggregated into this aggregator. + * + * @return the attribute, or null if no attributes have been aggregated yet + */ + @Override + public Attribute getAggregation() { + return this.max; + } + + @Override + public boolean hasAggregation() { + return max != null; + } + + /** + * Compares the given value to the current max in this aggregator. If no max has been established yet, or if the given value is greater than the current + * max, the given value will be retained as the new max. Otherwise, the current max will remain the same. + * + * @param value + * the value to aggregate + * @throws IllegalArgumentException + * if a value of a different {@link Attribute} than that of the current max is provided + */ + @SuppressWarnings({"rawtypes", "unchecked"}) + @Override + public void aggregate(Attribute value) { + if (this.max == null) { + this.max = value; + } else { + try { + Comparable maxCopy = this.max.copy(); + int compare = maxCopy.compareTo(value.copy()); + if (compare < 0) { + this.max = value; + } + } catch (Exception e) { + throw new IllegalArgumentException("Failed to compare current max '" + this.max.getData() + "' to new value '" + value.getData() + "'", e); + } + } + } + + @Override + public void merge(Aggregator other) { + if (other instanceof MaxAggregator) { + aggregate(((MaxAggregator) other).max); + } else { + throw new IllegalArgumentException("Cannot merge instance of " + other.getClass().getName()); + } + } + + @Override + public String toString() { + return new ToStringBuilder(this).append("field", field).append("max", max).toString(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/common/grouping/MinAggregator.java b/warehouse/query-core/src/main/java/datawave/query/common/grouping/MinAggregator.java new file mode 100644 index 0000000000..2a8d4a1731 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/common/grouping/MinAggregator.java @@ -0,0 +1,113 @@ +package datawave.query.common.grouping; + +import java.util.Collections; +import java.util.Set; + +import org.apache.accumulo.core.security.ColumnVisibility; +import org.apache.commons.lang.builder.ToStringBuilder; + +import datawave.query.attributes.Attribute; + +/** + * Determines the min of aggregated field values. This supports fields that have {@link datawave.query.attributes.Numeric} values, + * {@link datawave.query.attributes.DateContent} values, and values that have {@link String} data types. + */ +public class MinAggregator extends AbstractAggregator> { + + /** + * The current min attribute. + */ + private Attribute min; + + public static MinAggregator of(String field, Attribute min) { + return new MinAggregator(field, min); + } + + public MinAggregator(String field) { + super(field); + } + + private MinAggregator(String field, Attribute min) { + super(field); + this.min = min; + } + + /** + * Returns {@link AggregateOperation#MIN}. + * + * @return {@link AggregateOperation#MIN} + */ + @Override + public AggregateOperation getOperation() { + return AggregateOperation.MIN; + } + + /** + * Returns a singleton set containing the column visibility of the min attribute found. Possible empty, but never null. + * + * @return a set containing the column visibility + */ + @Override + public Set getColumnVisibilities() { + if (min != null) { + return Collections.singleton(min.getColumnVisibility()); + } + return Collections.emptySet(); + } + + /** + * Return the attribute with the min value seen of all attributes aggregated into this aggregator. + * + * @return the attribute, or null if no attributes have been aggregated yet + */ + @Override + public Attribute getAggregation() { + return min; + } + + @Override + public boolean hasAggregation() { + return min != null; + } + + /** + * Compares the given value to the current min in this aggregator. If no min has been established yet, or if the given value is less than the current min, + * the given value will be retained as the new min. Otherwise, the current min will remain the same. + * + * @param value + * the value to aggregate + * @throws IllegalArgumentException + * if a value of a different {@link Attribute} than that of the current min is provided + */ + @SuppressWarnings({"rawtypes", "unchecked"}) + @Override + public void aggregate(Attribute value) { + if (this.min == null) { + this.min = value; + } else { + try { + Comparable minCopy = this.min.copy(); + int compare = minCopy.compareTo(value.copy()); + if (compare > 0) { + this.min = value; + } + } catch (Exception e) { + throw new IllegalArgumentException("Failed to compare current min '" + this.min.getData() + "' to new value '" + value.getData() + "'", e); + } + } + } + + @Override + public void merge(Aggregator other) { + if (other instanceof MinAggregator) { + aggregate(((MinAggregator) other).min); + } else { + throw new IllegalArgumentException("Cannot merge instance of " + other.getClass().getName()); + } + } + + @Override + public String toString() { + return new ToStringBuilder(this).append("field", field).append("min", min).toString(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/common/grouping/SumAggregator.java b/warehouse/query-core/src/main/java/datawave/query/common/grouping/SumAggregator.java new file mode 100644 index 0000000000..888544af9f --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/common/grouping/SumAggregator.java @@ -0,0 +1,117 @@ +package datawave.query.common.grouping; + +import java.math.BigDecimal; +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +import org.apache.accumulo.core.security.ColumnVisibility; +import org.apache.commons.lang.builder.ToStringBuilder; + +import datawave.query.attributes.Attribute; +import datawave.query.attributes.Numeric; +import datawave.query.attributes.TypeAttribute; + +/** + * Calculates the sum of aggregated field values. This is limited to fields for which their values can be parsed as {@link BigDecimal} instances. + */ +public class SumAggregator extends AbstractAggregator { + + /** + * The current sum. + */ + private BigDecimal sum; + + /** + * The column visibilities of all attributes aggregated. + */ + private final Set columnVisibilities; + + public static SumAggregator of(String field, TypeAttribute attribute) { + BigDecimal sum = attribute.getType().getDelegate(); + return new SumAggregator(field, sum, attribute.getColumnVisibility()); + } + + public SumAggregator(String field) { + super(field); + this.columnVisibilities = new HashSet<>(); + } + + private SumAggregator(String field, BigDecimal sum, ColumnVisibility visibility) { + this(field); + this.sum = sum; + if (visibility != null) { + this.columnVisibilities.add(visibility); + } + } + + /** + * Returns {@link AggregateOperation#SUM}. + * + * @return {@link AggregateOperation#SUM} + */ + @Override + public AggregateOperation getOperation() { + return AggregateOperation.SUM; + } + + @Override + public Set getColumnVisibilities() { + return Collections.unmodifiableSet(columnVisibilities); + } + + /** + * Return the sum of all values seen for the field. + * + * @return the sum, or null if no values were aggregated + */ + @Override + public BigDecimal getAggregation() { + return sum; + } + + @Override + public boolean hasAggregation() { + return sum != null; + } + + /** + * Adds the value into the current sum. + * + * @param value + * the value to aggregate + * @throws IllegalArgumentException + * if the given value is not a {@link Numeric} type + */ + @Override + public void aggregate(Attribute value) { + BigDecimal number; + try { + number = new BigDecimal(value.getData().toString()); + } catch (Exception e) { + throw new IllegalArgumentException("Unable to calculate a sum with non-numerical value '" + value.getData() + "'", e); + } + if (sum == null) { + sum = number; + } else { + sum = sum.add(number); + } + columnVisibilities.add(value.getColumnVisibility()); + } + + @Override + public void merge(Aggregator other) { + if (other instanceof SumAggregator) { + SumAggregator aggregator = (SumAggregator) other; + this.sum = this.sum.add(aggregator.sum); + this.columnVisibilities.addAll(aggregator.columnVisibilities); + } else { + throw new IllegalArgumentException("Cannot merge instance of " + other.getAggregation()); + } + } + + @Override + public String toString() { + return new ToStringBuilder(this).append("field", field).append("sum", sum).append("columnVisibilities", columnVisibilities).toString(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/config/ShardQueryConfiguration.java b/warehouse/query-core/src/main/java/datawave/query/config/ShardQueryConfiguration.java index 36283459eb..3c8bd932ce 100644 --- a/warehouse/query-core/src/main/java/datawave/query/config/ShardQueryConfiguration.java +++ b/warehouse/query-core/src/main/java/datawave/query/config/ShardQueryConfiguration.java @@ -40,6 +40,7 @@ import datawave.query.QueryParameters; import datawave.query.attributes.ExcerptFields; import datawave.query.attributes.UniqueFields; +import datawave.query.common.grouping.GroupFields; import datawave.query.function.DocumentPermutation; import datawave.query.iterator.QueryIterator; import datawave.query.iterator.ivarator.IvaratorCacheDirConfig; @@ -358,10 +359,12 @@ public class ShardQueryConfiguration extends GenericQueryConfiguration implement private boolean compressServerSideResults = false; private boolean indexOnlyFilterFunctionsEnabled = false; private boolean compositeFilterFunctionsEnabled = false; - + /** + * The fields to group by and aggregate. + */ + private GroupFields groupFields = new GroupFields(); private int groupFieldsBatchSize; private boolean accrueStats = false; - private Set groupFields = new HashSet<>(0); private UniqueFields uniqueFields = new UniqueFields(); private boolean cacheModel = false; /** @@ -618,7 +621,6 @@ public ShardQueryConfiguration(ShardQueryConfiguration other) { this.setCompositeFilterFunctionsEnabled(other.isCompositeFilterFunctionsEnabled()); this.setGroupFieldsBatchSize(other.getGroupFieldsBatchSize()); this.setAccrueStats(other.getAccrueStats()); - this.setGroupFields(null == other.getGroupFields() ? null : Sets.newHashSet(other.getGroupFields())); this.setUniqueFields(UniqueFields.copyOf(other.getUniqueFields())); this.setCacheModel(other.getCacheModel()); this.setTrackSizes(other.isTrackSizes()); @@ -647,6 +649,7 @@ public ShardQueryConfiguration(ShardQueryConfiguration other) { this.setLazySetMechanismEnabled(other.isLazySetMechanismEnabled()); this.setDocAggregationThresholdMs(other.getDocAggregationThresholdMs()); this.setTfAggregationThresholdMs(other.getTfAggregationThresholdMs()); + this.setGroupFields(GroupFields.copyOf(other.getGroupFields())); this.setPruneQueryOptions(other.getPruneQueryOptions()); } @@ -1658,19 +1661,6 @@ public void setFailOutsideValidDateRange(boolean failOutsideValidDateRange) { this.failOutsideValidDateRange = failOutsideValidDateRange; } - public Set getGroupFields() { - return groupFields; - } - - public void setGroupFields(Set groupFields) { - this.groupFields = deconstruct(groupFields); - } - - @JsonIgnore - public String getGroupFieldsAsString() { - return StringUtils.join(this.getGroupFields(), Constants.PARAM_VALUE_SEP); - } - public int getGroupFieldsBatchSize() { return groupFieldsBatchSize; } @@ -2498,6 +2488,18 @@ public void setTfAggregationThresholdMs(int tfAggregationThresholdMs) { this.tfAggregationThresholdMs = tfAggregationThresholdMs; } + public GroupFields getGroupFields() { + return groupFields; + } + + public void setGroupFields(GroupFields groupFields) { + this.groupFields = groupFields; + // Make sure the fields are deconstructed by this point. + if (this.groupFields != null) { + this.groupFields.deconstructIdentifiers(); + } + } + public boolean getPruneQueryOptions() { return pruneQueryOptions; } diff --git a/warehouse/query-core/src/main/java/datawave/query/iterator/GroupingIterator.java b/warehouse/query-core/src/main/java/datawave/query/iterator/GroupingIterator.java index c90c5279e7..557bfa5da6 100644 --- a/warehouse/query-core/src/main/java/datawave/query/iterator/GroupingIterator.java +++ b/warehouse/query-core/src/main/java/datawave/query/iterator/GroupingIterator.java @@ -2,37 +2,31 @@ import static org.slf4j.LoggerFactory.getLogger; -import java.math.BigDecimal; import java.util.AbstractMap; import java.util.ArrayList; -import java.util.Collection; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; -import java.util.stream.Collectors; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.iterators.YieldCallback; import org.apache.accumulo.core.security.ColumnVisibility; import org.slf4j.Logger; -import org.springframework.util.Assert; -import com.google.common.collect.HashMultimap; import com.google.common.collect.Maps; -import com.google.common.collect.Multimap; -import datawave.data.type.NumberType; import datawave.marking.MarkingFunctions; import datawave.query.attributes.Attribute; import datawave.query.attributes.Document; import datawave.query.attributes.TypeAttribute; -import datawave.query.common.grouping.GroupingUtil; -import datawave.query.common.grouping.GroupingUtil.GroupCountingHashMap; -import datawave.query.common.grouping.GroupingUtil.GroupingTypeAttribute; -import datawave.query.jexl.JexlASTHelper; +import datawave.query.common.grouping.DocumentGrouper; +import datawave.query.common.grouping.Group; +import datawave.query.common.grouping.GroupFields; +import datawave.query.common.grouping.GroupingUtils; +import datawave.query.common.grouping.Groups; /** * Because the t-server may tear down and start a new iterator at any time after a next() call, there can be no saved state in this class. For that reason, each @@ -43,22 +37,14 @@ public class GroupingIterator implements Iterator> { private static final Logger log = getLogger(GroupingIterator.class); /** - * the fields (user provided) to group by + * The fields to group and aggregate by. */ - private final Set groupFieldsSet; + private final GroupFields groupFields; /** - * A map of TypeAttribute collection keys to integer counts This map uses a special key type that ignores the metadata (with visibilities) in its hashCode - * and equals methods + * The groups. This is updated each time */ - private GroupCountingHashMap countingMap; - - /** - * holds the aggregated column visibilities for each grouped event - */ - private Multimap>,ColumnVisibility> fieldVisibilities = HashMultimap.create(); - - private final GroupingUtil groupingUtil = new GroupingUtil(); + private final Groups groups; /** * list of keys that have been read, in order to keep track of where we left off when a new iterator is created @@ -73,27 +59,16 @@ public class GroupingIterator implements Iterator> { private final Iterator> previousIterators; - private final LinkedList documents = new LinkedList<>(); - Map.Entry next; - /** - * Length of time in milliseconds that a client will wait for a results to be returned. If a result is not collected before the timeout, a key with an - * "intermediate" document will be returned. - */ - private final long resultTimeout; - - public GroupingIterator(Iterator> previousIterators, MarkingFunctions markingFunctions, Collection groupFieldsSet, - int groupFieldsBatchSize, YieldCallback yieldCallback, long resultTimeout) { + public GroupingIterator(Iterator> previousIterators, MarkingFunctions markingFunctions, GroupFields groupFields, + int groupFieldsBatchSize, YieldCallback yieldCallback) { this.previousIterators = previousIterators; this.markingFunctions = markingFunctions; - this.groupFieldsSet = groupFieldsSet.stream().map(JexlASTHelper::deconstructIdentifier).collect(Collectors.toSet()); + this.groupFields = groupFields; this.groupFieldsBatchSize = groupFieldsBatchSize; this.yieldCallback = yieldCallback; - - this.countingMap = new GroupCountingHashMap(this.markingFunctions); - - this.resultTimeout = resultTimeout; + this.groups = new Groups(); } @Override @@ -103,15 +78,12 @@ public boolean hasNext() { Map.Entry entry = previousIterators.next(); if (entry != null) { log.trace("t-server get list key counts for: {}", entry); - keys.add(entry.getKey()); - GroupingUtil.GroupingInfo groupingInfo = groupingUtil.getGroupingInfo(entry, groupFieldsSet, this.countingMap); - this.countingMap = groupingInfo.getCountsMap(); - this.fieldVisibilities = groupingInfo.getFieldVisibilities(); + DocumentGrouper.group(entry, groupFields, groups); } } else if (yieldCallback != null && yieldCallback.hasYielded()) { log.trace("hasNext is false because yield was called"); - if (countingMap != null && !countingMap.isEmpty()) { + if (!groups.isEmpty()) { // reset the yield and use its key in the flattened document prepared below keys.add(yieldCallback.getPositionAndReset()); } @@ -122,36 +94,14 @@ public boolean hasNext() { } } + LinkedList documents = new LinkedList<>(); Document document = null; next = null; - if (countingMap != null && !countingMap.isEmpty()) { - - log.trace("hasNext() will use the countingMap: {}", countingMap); - - for (Collection> entry : countingMap.keySet()) { - log.trace("from countingMap, got entry: {}", entry); - ColumnVisibility columnVisibility; - try { - columnVisibility = groupingUtil.combine(fieldVisibilities.get(entry), markingFunctions); - } catch (Exception e) { - throw new IllegalStateException("Unable to merge column visibilities: " + fieldVisibilities.get(entry), e); - } - // grab a key from those saved during getListKeyCounts - Assert.notEmpty(keys, "no available keys for grouping results"); - // use the last (most recent) key so a new iterator will know where to start - Key docKey = keys.get(keys.size() - 1); - Document d = new Document(docKey, true); - d.setColumnVisibility(columnVisibility); - - entry.forEach(base -> d.put(base.getMetadata().getRow().toString(), base)); - NumberType type = new NumberType(); - type.setDelegate(new BigDecimal(countingMap.get(entry))); - TypeAttribute attr = new TypeAttribute<>(type, new Key("count"), true); - d.put("COUNT", attr); - documents.add(d); + if (!groups.isEmpty()) { + for (Group group : groups.getGroups()) { + documents.add(GroupingUtils.createDocument(group, keys, markingFunctions, GroupingUtils.AverageAggregatorWriteFormat.NUMERATOR_AND_DIVISOR)); } - // flatten to just one document document = flatten(documents); } @@ -165,7 +115,7 @@ public boolean hasNext() { } next = Maps.immutableEntry(key, document); log.trace("hasNext {}", next); - countingMap.clear(); + groups.clear(); return true; } @@ -229,30 +179,30 @@ public Map.Entry next() { * @return a flattened document */ private Document flatten(List documents) { - log.trace("flatten {}", documents); - Document theDocument = new Document(documents.get(documents.size() - 1).getMetadata(), true); + log.trace("Flattening {}", documents); + + Document flattened = new Document(documents.get(documents.size() - 1).getMetadata(), true); + int context = 0; Set visibilities = new HashSet<>(); for (Document document : documents) { log.trace("document: {}", document); for (Map.Entry>> entry : document.entrySet()) { - String name = entry.getKey(); visibilities.add(entry.getValue().getColumnVisibility()); - + // Add a copy of each attribute to the flattened document with the context appended to the key, e.g. AGE becomes AGE.0. Attribute> attribute = entry.getValue(); attribute.setColumnVisibility(entry.getValue().getColumnVisibility()); - // call copy() on the GroupingTypeAttribute to get a plain TypeAttribute - // instead of a GroupingTypeAttribute that is package protected and won't serialize - theDocument.put(name + "." + Integer.toHexString(context).toUpperCase(), (TypeAttribute) attribute.copy(), true, false); + // Call copy() on the GroupingTypeAttribute to get a plain TypeAttribute instead of a GroupingTypeAttribute that is package protected and won't + // serialize. + flattened.put(entry.getKey() + "." + Integer.toHexString(context).toUpperCase(), (TypeAttribute) attribute.copy(), true, false); } + // Increment the context by one. context++; } - ColumnVisibility combinedVisibility = groupingUtil.combine(visibilities, markingFunctions); - log.trace("combined visibilities: {} to {}", visibilities, combinedVisibility); - theDocument.setColumnVisibility(combinedVisibility); - // documents.clear(); - log.trace("flattened document: {}", theDocument); - return theDocument; - } + // Set the flattened document's visibility to the combined visibilities of each document. + flattened.setColumnVisibility(GroupingUtils.combineVisibilities(visibilities, markingFunctions, false)); + log.trace("flattened document: {}", flattened); + return flattened; + } } diff --git a/warehouse/query-core/src/main/java/datawave/query/iterator/QueryIterator.java b/warehouse/query-core/src/main/java/datawave/query/iterator/QueryIterator.java index a6543735ff..6f8a31f3b6 100644 --- a/warehouse/query-core/src/main/java/datawave/query/iterator/QueryIterator.java +++ b/warehouse/query-core/src/main/java/datawave/query/iterator/QueryIterator.java @@ -1575,11 +1575,11 @@ protected UniqueTransform getUniqueTransform() { } protected GroupingIterator getGroupingIteratorInstance(Iterator> in) { - if (groupingIterator == null && getGroupFields() != null && !getGroupFields().isEmpty()) { + if (groupingIterator == null && getGroupFields() != null && getGroupFields().hasGroupByFields()) { synchronized (getGroupFields()) { if (groupingIterator == null) { groupingIterator = new GroupingIterator(in, MarkingFunctionsFactory.createMarkingFunctions(), getGroupFields(), this.groupFieldsBatchSize, - this.yield, this.getResultTimeout()); + this.yield); } } } diff --git a/warehouse/query-core/src/main/java/datawave/query/iterator/QueryOptions.java b/warehouse/query-core/src/main/java/datawave/query/iterator/QueryOptions.java index bad9ebca61..ea163a8fca 100644 --- a/warehouse/query-core/src/main/java/datawave/query/iterator/QueryOptions.java +++ b/warehouse/query-core/src/main/java/datawave/query/iterator/QueryOptions.java @@ -62,6 +62,7 @@ import datawave.query.attributes.Document; import datawave.query.attributes.ExcerptFields; import datawave.query.attributes.UniqueFields; +import datawave.query.common.grouping.GroupFields; import datawave.query.composite.CompositeMetadata; import datawave.query.function.ConfiguredFunction; import datawave.query.function.DocumentPermutation; @@ -288,7 +289,7 @@ public class QueryOptions implements OptionDescriber { protected boolean limitFieldsPreQueryEvaluation = false; protected String limitFieldsField = null; - protected Set groupFields = Sets.newHashSet(); + protected GroupFields groupFields = new GroupFields(); protected int groupFieldsBatchSize = Integer.MAX_VALUE; protected UniqueFields uniqueFields = new UniqueFields(); @@ -1011,11 +1012,11 @@ public void setLimitFieldsField(String limitFieldsField) { this.limitFieldsField = limitFieldsField; } - public Set getGroupFields() { + public GroupFields getGroupFields() { return groupFields; } - public void setGroupFields(Set groupFields) { + public void setGroupFields(GroupFields groupFields) { this.groupFields = groupFields; } @@ -1131,7 +1132,7 @@ public IteratorOptions describeOptions() { "Classes implementing DocumentPermutation which can transform the document prior to evaluation (e.g. expand/mutate fields)."); options.put(LIMIT_FIELDS, "limit fields"); options.put(MATCHING_FIELD_SETS, "matching field sets (used along with limit fields)"); - options.put(GROUP_FIELDS, "group fields"); + options.put(GROUP_FIELDS, "group fields and fields to aggregate"); options.put(GROUP_FIELDS_BATCH_SIZE, "group fields.batch.size"); options.put(UNIQUE_FIELDS, "unique fields"); options.put(HIT_LIST, "hit list"); @@ -1479,10 +1480,7 @@ public boolean validateOptions(Map options) { } if (options.containsKey(GROUP_FIELDS)) { - String groupFields = options.get(GROUP_FIELDS); - for (String param : Splitter.on(',').omitEmptyStrings().trimResults().split(groupFields)) { - this.getGroupFields().add(param); - } + this.setGroupFields(GroupFields.from(options.get(GROUP_FIELDS))); } if (options.containsKey(GROUP_FIELDS_BATCH_SIZE)) { diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctions.java b/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctions.java index 5e857e3d16..3834a8ab1f 100644 --- a/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctions.java +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctions.java @@ -4,7 +4,6 @@ import java.util.Collections; import java.util.Objects; import java.util.Set; -import java.util.function.Function; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.StreamSupport; @@ -13,7 +12,6 @@ import org.apache.log4j.Logger; import datawave.data.type.util.NumericalEncoder; -import datawave.query.attributes.UniqueGranularity; import datawave.query.attributes.ValueTuple; import datawave.query.collections.FunctionalSet; import datawave.query.jexl.JexlPatternCache; @@ -35,6 +33,11 @@ public class QueryFunctions { public static final String MATCH_REGEX = "matchRegex"; public static final String INCLUDE_TEXT = "includeText"; public static final String NO_EXPANSION = "noExpansion"; + public static final String SUM = "sum"; + public static final String MAX = "max"; + public static final String MIN = "min"; + public static final String COUNT = "count"; + public static final String AVERAGE = "average"; protected static Logger log = Logger.getLogger(QueryFunctions.class); diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java b/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java index bbf26a72b7..77f13b0e1d 100644 --- a/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java @@ -207,6 +207,11 @@ private static void verify(String name, int numArgs) { case QueryFunctions.NO_EXPANSION: case QueryFunctions.LENIENT_FIELDS_FUNCTION: case QueryFunctions.STRICT_FIELDS_FUNCTION: + case QueryFunctions.SUM: + case QueryFunctions.COUNT: + case QueryFunctions.MIN: + case QueryFunctions.MAX: + case QueryFunctions.AVERAGE: if (numArgs == 0) { throw new IllegalArgumentException("Expected at least one argument to the " + name + " function"); } diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/QueryOptionsFromQueryVisitor.java b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/QueryOptionsFromQueryVisitor.java index 0f162cb35b..0dabefa649 100644 --- a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/QueryOptionsFromQueryVisitor.java +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/QueryOptionsFromQueryVisitor.java @@ -58,7 +58,8 @@ public class QueryOptionsFromQueryVisitor extends RebuildingVisitor { UniqueFunction.UNIQUE_BY_MINUTE_FUNCTION, UniqueFunction.UNIQUE_BY_TENTH_OF_HOUR_FUNCTION, UniqueFunction.UNIQUE_BY_MONTH_FUNCTION, UniqueFunction.UNIQUE_BY_SECOND_FUNCTION, UniqueFunction.UNIQUE_BY_MILLISECOND_FUNCTION, UniqueFunction.UNIQUE_BY_YEAR_FUNCTION, QueryFunctions.GROUPBY_FUNCTION, QueryFunctions.EXCERPT_FIELDS_FUNCTION, QueryFunctions.NO_EXPANSION, - QueryFunctions.LENIENT_FIELDS_FUNCTION, QueryFunctions.STRICT_FIELDS_FUNCTION); + QueryFunctions.LENIENT_FIELDS_FUNCTION, QueryFunctions.STRICT_FIELDS_FUNCTION, QueryFunctions.SUM, QueryFunctions.MIN, QueryFunctions.MAX, + QueryFunctions.AVERAGE, QueryFunctions.COUNT); @SuppressWarnings("unchecked") public static T collect(T node, Object data) { @@ -262,6 +263,36 @@ private Object visit(ASTFunctionNode node, Map optionsMap) { updateFieldsOption(optionsMap, QueryParameters.STRICT_FIELDS, optionsList); return null; } + case QueryFunctions.SUM: { + List options = new ArrayList<>(); + this.visit(node, options); + optionsMap.put(QueryParameters.SUM_FIELDS, JOINER.join(options)); + return null; + } + case QueryFunctions.MAX: { + List options = new ArrayList<>(); + this.visit(node, options); + optionsMap.put(QueryParameters.MAX_FIELDS, JOINER.join(options)); + return null; + } + case QueryFunctions.MIN: { + List options = new ArrayList<>(); + this.visit(node, options); + optionsMap.put(QueryParameters.MIN_FIELDS, JOINER.join(options)); + return null; + } + case QueryFunctions.AVERAGE: { + List options = new ArrayList<>(); + this.visit(node, options); + optionsMap.put(QueryParameters.AVERAGE_FIELDS, JOINER.join(options)); + return null; + } + case QueryFunctions.COUNT: { + List options = new ArrayList<>(); + this.visit(node, options); + optionsMap.put(QueryParameters.COUNT_FIELDS, JOINER.join(options)); + return null; + } } } return super.visit(node, optionsMap); diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/Average.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/Average.java new file mode 100644 index 0000000000..2fa955836f --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/Average.java @@ -0,0 +1,48 @@ +package datawave.query.language.functions.jexl; + +import java.text.MessageFormat; +import java.util.ArrayList; + +import datawave.query.jexl.functions.QueryFunctions; +import datawave.query.language.functions.QueryFunction; +import datawave.webservice.query.exception.BadRequestQueryException; +import datawave.webservice.query.exception.DatawaveErrorCode; + +public class Average extends JexlQueryFunction { + + public Average() { + super(QueryFunctions.AVERAGE, new ArrayList<>()); + } + + @Override + public void validate() throws IllegalArgumentException { + if (this.parameterList.isEmpty()) { + BadRequestQueryException qe = new BadRequestQueryException(DatawaveErrorCode.INVALID_FUNCTION_ARGUMENTS, MessageFormat.format("{0}", this.name)); + throw new IllegalArgumentException(qe); + } + } + + @Override + public QueryFunction duplicate() { + return new Average(); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + + sb.append(QueryFunctions.QUERY_FUNCTION_NAMESPACE).append(':').append(QueryFunctions.AVERAGE); + if (parameterList.isEmpty()) { + sb.append("()"); + } else { + char separator = '('; + for (String parm : parameterList) { + sb.append(separator).append(escapeString(parm)); + separator = ','; + } + sb.append(')'); + } + + return sb.toString(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/Count.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/Count.java new file mode 100644 index 0000000000..322faeaf01 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/Count.java @@ -0,0 +1,48 @@ +package datawave.query.language.functions.jexl; + +import java.text.MessageFormat; +import java.util.ArrayList; + +import datawave.query.jexl.functions.QueryFunctions; +import datawave.query.language.functions.QueryFunction; +import datawave.webservice.query.exception.BadRequestQueryException; +import datawave.webservice.query.exception.DatawaveErrorCode; + +public class Count extends JexlQueryFunction { + + public Count() { + super(QueryFunctions.COUNT, new ArrayList<>()); + } + + @Override + public void validate() throws IllegalArgumentException { + if (this.parameterList.isEmpty()) { + BadRequestQueryException qe = new BadRequestQueryException(DatawaveErrorCode.INVALID_FUNCTION_ARGUMENTS, MessageFormat.format("{0}", this.name)); + throw new IllegalArgumentException(qe); + } + } + + @Override + public QueryFunction duplicate() { + return new Count(); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + + sb.append(QueryFunctions.QUERY_FUNCTION_NAMESPACE).append(':').append(QueryFunctions.COUNT); + if (parameterList.isEmpty()) { + sb.append("()"); + } else { + char separator = '('; + for (String parm : parameterList) { + sb.append(separator).append(escapeString(parm)); + separator = ','; + } + sb.append(')'); + } + + return sb.toString(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/Max.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/Max.java new file mode 100644 index 0000000000..5698cdc3d7 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/Max.java @@ -0,0 +1,48 @@ +package datawave.query.language.functions.jexl; + +import java.text.MessageFormat; +import java.util.ArrayList; + +import datawave.query.jexl.functions.QueryFunctions; +import datawave.query.language.functions.QueryFunction; +import datawave.webservice.query.exception.BadRequestQueryException; +import datawave.webservice.query.exception.DatawaveErrorCode; + +public class Max extends JexlQueryFunction { + + public Max() { + super(QueryFunctions.MAX, new ArrayList<>()); + } + + @Override + public void validate() throws IllegalArgumentException { + if (this.parameterList.isEmpty()) { + BadRequestQueryException qe = new BadRequestQueryException(DatawaveErrorCode.INVALID_FUNCTION_ARGUMENTS, MessageFormat.format("{0}", this.name)); + throw new IllegalArgumentException(qe); + } + } + + @Override + public QueryFunction duplicate() { + return new Max(); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + + sb.append(QueryFunctions.QUERY_FUNCTION_NAMESPACE).append(':').append(QueryFunctions.MAX); + if (parameterList.isEmpty()) { + sb.append("()"); + } else { + char separator = '('; + for (String parm : parameterList) { + sb.append(separator).append(escapeString(parm)); + separator = ','; + } + sb.append(')'); + } + + return sb.toString(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/Min.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/Min.java new file mode 100644 index 0000000000..821891bf3b --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/Min.java @@ -0,0 +1,48 @@ +package datawave.query.language.functions.jexl; + +import java.text.MessageFormat; +import java.util.ArrayList; + +import datawave.query.jexl.functions.QueryFunctions; +import datawave.query.language.functions.QueryFunction; +import datawave.webservice.query.exception.BadRequestQueryException; +import datawave.webservice.query.exception.DatawaveErrorCode; + +public class Min extends JexlQueryFunction { + + public Min() { + super(QueryFunctions.MIN, new ArrayList<>()); + } + + @Override + public void validate() throws IllegalArgumentException { + if (this.parameterList.isEmpty()) { + BadRequestQueryException qe = new BadRequestQueryException(DatawaveErrorCode.INVALID_FUNCTION_ARGUMENTS, MessageFormat.format("{0}", this.name)); + throw new IllegalArgumentException(qe); + } + } + + @Override + public QueryFunction duplicate() { + return new Min(); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + + sb.append(QueryFunctions.QUERY_FUNCTION_NAMESPACE).append(':').append(QueryFunctions.MIN); + if (parameterList.isEmpty()) { + sb.append("()"); + } else { + char separator = '('; + for (String parm : parameterList) { + sb.append(separator).append(escapeString(parm)); + separator = ','; + } + sb.append(')'); + } + + return sb.toString(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/Sum.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/Sum.java new file mode 100644 index 0000000000..b0c7ee955c --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/Sum.java @@ -0,0 +1,48 @@ +package datawave.query.language.functions.jexl; + +import java.text.MessageFormat; +import java.util.ArrayList; + +import datawave.query.jexl.functions.QueryFunctions; +import datawave.query.language.functions.QueryFunction; +import datawave.webservice.query.exception.BadRequestQueryException; +import datawave.webservice.query.exception.DatawaveErrorCode; + +public class Sum extends JexlQueryFunction { + + public Sum() { + super(QueryFunctions.SUM, new ArrayList<>()); + } + + @Override + public void validate() throws IllegalArgumentException { + if (this.parameterList.isEmpty()) { + BadRequestQueryException qe = new BadRequestQueryException(DatawaveErrorCode.INVALID_FUNCTION_ARGUMENTS, MessageFormat.format("{0}", this.name)); + throw new IllegalArgumentException(qe); + } + } + + @Override + public QueryFunction duplicate() { + return new Sum(); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + + sb.append(QueryFunctions.QUERY_FUNCTION_NAMESPACE).append(':').append(QueryFunctions.SUM); + if (parameterList.isEmpty()) { + sb.append("()"); + } else { + char separator = '('; + for (String parm : parameterList) { + sb.append(separator).append(escapeString(parm)); + separator = ','; + } + sb.append(')'); + } + + return sb.toString(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java index cfda8f2062..b7b30a9d1d 100644 --- a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java +++ b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java @@ -65,6 +65,7 @@ import datawave.query.QueryParameters; import datawave.query.attributes.ExcerptFields; import datawave.query.attributes.UniqueFields; +import datawave.query.common.grouping.GroupFields; import datawave.query.composite.CompositeMetadata; import datawave.query.composite.CompositeUtils; import datawave.query.config.ShardQueryConfiguration; @@ -523,9 +524,9 @@ private void configureIterator(ShardQueryConfiguration config, IteratorSetting c addOption(cfg, QueryOptions.LIMIT_FIELDS, config.getLimitFieldsAsString(), false); addOption(cfg, QueryOptions.MATCHING_FIELD_SETS, config.getMatchingFieldSetsAsString(), false); - addOption(cfg, QueryOptions.GROUP_FIELDS, config.getGroupFieldsAsString(), false); - addOption(cfg, QueryOptions.GROUP_FIELDS_BATCH_SIZE, config.getGroupFieldsBatchSizeAsString(), false); - addOption(cfg, QueryOptions.UNIQUE_FIELDS, config.getUniqueFields().toString(), false); + addOption(cfg, QueryOptions.GROUP_FIELDS, config.getGroupFields().toString(), true); + addOption(cfg, QueryOptions.GROUP_FIELDS_BATCH_SIZE, config.getGroupFieldsBatchSizeAsString(), true); + addOption(cfg, QueryOptions.UNIQUE_FIELDS, config.getUniqueFields().toString(), true); addOption(cfg, QueryOptions.HIT_LIST, Boolean.toString(config.isHitList()), false); addOption(cfg, QueryOptions.TERM_FREQUENCY_FIELDS, Joiner.on(',').join(config.getQueryTermFrequencyFields()), false); addOption(cfg, QueryOptions.TERM_FREQUENCIES_REQUIRED, Boolean.toString(config.isTermFrequenciesRequired()), false); @@ -548,7 +549,7 @@ private void configureIterator(ShardQueryConfiguration config, IteratorSetting c */ private void configureExcerpts(ShardQueryConfiguration config, IteratorSetting cfg) { if (config.isTermFrequenciesRequired()) { - addOption(cfg, QueryOptions.EXCERPT_FIELDS, config.getExcerptFields().toString(), false); + addOption(cfg, QueryOptions.EXCERPT_FIELDS, config.getExcerptFields().toString(), true); addOption(cfg, QueryOptions.EXCERPT_ITERATOR, config.getExcerptIterator().getName(), false); } } @@ -1711,8 +1712,7 @@ protected ASTJexlScript applyQueryModel(MetadataHelper metadataHelper, ShardQuer Multimap inverseReverseModel = invertMultimap(queryModel.getReverseQueryMapping()); inverseReverseModel.putAll(queryModel.getForwardQueryMapping()); - Collection projectFields = config.getProjectFields(), blacklistedFields = config.getBlacklistedFields(), limitFields = config.getLimitFields(), - groupFields = config.getGroupFields(); + Collection projectFields = config.getProjectFields(), blacklistedFields = config.getBlacklistedFields(), limitFields = config.getLimitFields(); if (projectFields != null && !projectFields.isEmpty()) { projectFields = queryModel.remapParameter(projectFields, inverseReverseModel); @@ -1722,14 +1722,16 @@ protected ASTJexlScript applyQueryModel(MetadataHelper metadataHelper, ShardQuer config.setProjectFields(Sets.newHashSet(projectFields)); } - if (groupFields != null && !groupFields.isEmpty()) { - Collection remappedGroupFields = queryModel.remapParameter(groupFields, inverseReverseModel); + GroupFields groupFields = config.getGroupFields(); + if (groupFields != null && groupFields.hasGroupByFields()) { + groupFields.remapFields(inverseReverseModel, queryModel.getReverseQueryMapping()); if (log.isTraceEnabled()) { - log.trace("Updated grouping set using query model to: " + remappedGroupFields); + log.trace("Updating group-by fields using query model to " + groupFields); } - config.setGroupFields(Sets.newHashSet(remappedGroupFields)); - // if grouping is set, also set the projection to be the same - config.setProjectFields(Sets.newHashSet(remappedGroupFields)); + config.setGroupFields(groupFields); + + // If grouping is set, we must make the projection fields match all the group-by fields and aggregation fields. + config.setProjectFields(groupFields.getProjectionFields()); } UniqueFields uniqueFields = config.getUniqueFields(); @@ -2435,7 +2437,7 @@ protected void setCommonIteratorOptions(ShardQueryConfiguration config, Iterator } // if groupby function is used, force include.grouping.context to be true - if (config.getGroupFields() != null && !config.getGroupFields().isEmpty()) { + if (config.getGroupFields() != null && config.getGroupFields().hasGroupByFields()) { addOption(cfg, QueryOptions.INCLUDE_GROUPING_CONTEXT, Boolean.toString(true), false); } else { addOption(cfg, QueryOptions.INCLUDE_GROUPING_CONTEXT, Boolean.toString(config.getIncludeGroupingContext()), false); diff --git a/warehouse/query-core/src/main/java/datawave/query/planner/QueryOptionsSwitch.java b/warehouse/query-core/src/main/java/datawave/query/planner/QueryOptionsSwitch.java index 2b47f226da..0bde1ef084 100644 --- a/warehouse/query-core/src/main/java/datawave/query/planner/QueryOptionsSwitch.java +++ b/warehouse/query-core/src/main/java/datawave/query/planner/QueryOptionsSwitch.java @@ -12,6 +12,7 @@ import datawave.query.QueryParameters; import datawave.query.attributes.ExcerptFields; import datawave.query.attributes.UniqueFields; +import datawave.query.common.grouping.GroupFields; import datawave.query.config.ShardQueryConfiguration; import datawave.util.StringUtils; import datawave.webservice.common.logging.ThreadConfigurableLogger; @@ -21,6 +22,7 @@ public class QueryOptionsSwitch { private static final Logger log = ThreadConfigurableLogger.getLogger(QueryOptionsSwitch.class); public static void apply(Map optionsMap, ShardQueryConfiguration config) { + GroupFields groupFields; for (Map.Entry entry : optionsMap.entrySet()) { String key = entry.getKey(); String value = entry.getValue(); @@ -41,8 +43,13 @@ public static void apply(Map optionsMap, ShardQueryConfiguration break; case QueryParameters.GROUP_FIELDS: String[] groups = StringUtils.split(value, Constants.PARAM_VALUE_SEP); - config.setGroupFields(Sets.newHashSet(groups)); - config.setProjectFields(Sets.newHashSet(groups)); + groupFields = config.getGroupFields(); + groupFields.setGroupByFields(Sets.newHashSet(groups)); + config.setGroupFields(groupFields); + // If there are any group-by fields, update the projection fields to include them. + if (groupFields.hasGroupByFields()) { + config.setProjectFields(groupFields.getProjectionFields()); + } break; case QueryParameters.GROUP_FIELDS_BATCH_SIZE: try { @@ -68,6 +75,56 @@ public static void apply(Map optionsMap, ShardQueryConfiguration case QueryParameters.STRICT_FIELDS: config.setStrictFields(new HashSet<>(Arrays.asList(StringUtils.split(value, ',')))); break; + case QueryParameters.SUM_FIELDS: + String[] sumFields = StringUtils.split(value, Constants.PARAM_VALUE_SEP); + groupFields = config.getGroupFields(); + groupFields.setSumFields(Sets.newHashSet(sumFields)); + config.setGroupFields(groupFields); + // Update the projection fields only if we have group-by fields specified. + if (groupFields.hasGroupByFields()) { + config.setProjectFields(groupFields.getProjectionFields()); + } + break; + case QueryParameters.MAX_FIELDS: + String[] maxFields = StringUtils.split(value, Constants.PARAM_VALUE_SEP); + groupFields = config.getGroupFields(); + groupFields.setMaxFields(Sets.newHashSet(maxFields)); + config.setGroupFields(groupFields); + // Update the projection fields only if we have group-by fields specified. + if (groupFields.hasGroupByFields()) { + config.setProjectFields(groupFields.getProjectionFields()); + } + break; + case QueryParameters.MIN_FIELDS: + String[] minFields = StringUtils.split(value, Constants.PARAM_VALUE_SEP); + groupFields = config.getGroupFields(); + groupFields.setMinFields(Sets.newHashSet(minFields)); + config.setGroupFields(groupFields); + // Update the projection fields only if we have group-by fields specified. + if (groupFields.hasGroupByFields()) { + config.setProjectFields(groupFields.getProjectionFields()); + } + break; + case QueryParameters.COUNT_FIELDS: + String[] countFields = StringUtils.split(value, Constants.PARAM_VALUE_SEP); + groupFields = config.getGroupFields(); + groupFields.setCountFields(Sets.newHashSet(countFields)); + config.setGroupFields(groupFields); + // Update the projection fields only if we have group-by fields specified. + if (groupFields.hasGroupByFields()) { + config.setProjectFields(groupFields.getProjectionFields()); + } + break; + case QueryParameters.AVERAGE_FIELDS: + String[] averageFields = StringUtils.split(value, Constants.PARAM_VALUE_SEP); + groupFields = config.getGroupFields(); + groupFields.setAverageFields(Sets.newHashSet(averageFields)); + config.setGroupFields(groupFields); + // Update the projection fields only if we have group-by fields specified. + if (groupFields.hasGroupByFields()) { + config.setProjectFields(groupFields.getProjectionFields()); + } + break; } } } diff --git a/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java b/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java index f39c2af0f4..d0836c9743 100644 --- a/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java +++ b/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java @@ -48,6 +48,7 @@ import datawave.query.attributes.ExcerptFields; import datawave.query.attributes.UniqueFields; import datawave.query.cardinality.CardinalityConfiguration; +import datawave.query.common.grouping.GroupFields; import datawave.query.config.IndexHole; import datawave.query.config.Profile; import datawave.query.config.ShardQueryConfiguration; @@ -612,7 +613,7 @@ public QueryLogicTransformer getTransformer(Query settings) { } public boolean isLongRunningQuery() { - return !getConfig().getGroupFields().isEmpty(); + return getConfig().getGroupFields().hasGroupByFields(); } /** @@ -632,13 +633,14 @@ private void addConfigBasedTransformers() { } } - if (getConfig().getGroupFields() != null && !getConfig().getGroupFields().isEmpty()) { + GroupFields groupFields = getGroupByFields(); + if (groupFields != null && groupFields.hasGroupByFields()) { DocumentTransform alreadyExists = ((DocumentTransformer) this.transformerInstance).containsTransform(GroupingTransform.class); if (alreadyExists != null) { - ((GroupingTransform) alreadyExists).updateConfig(getConfig().getGroupFields(), getQueryModel()); + ((GroupingTransform) alreadyExists).updateConfig(groupFields); } else { - ((DocumentTransformer) this.transformerInstance).addTransform(new GroupingTransform(getQueryModel(), getConfig().getGroupFields(), - this.markingFunctions, this.getQueryExecutionForPageTimeout())); + ((DocumentTransformer) this.transformerInstance) + .addTransform(new GroupingTransform(groupFields, this.markingFunctions, this.getQueryExecutionForPageTimeout())); } } } @@ -790,15 +792,49 @@ protected void loadQueryParameters(ShardQueryConfiguration config, Query setting } // Get the GROUP_FIELDS parameter if given - String groupFields = settings.findParameter(QueryParameters.GROUP_FIELDS).getParameterValue().trim(); - if (StringUtils.isNotBlank(groupFields)) { - List groupFieldsList = Arrays.asList(StringUtils.split(groupFields, Constants.PARAM_VALUE_SEP)); - - // Only set the group fields if we were actually given some - if (!groupFieldsList.isEmpty()) { - this.setGroupFields(new HashSet<>(groupFieldsList)); - config.setGroupFields(new HashSet<>(groupFieldsList)); - config.setProjectFields(new HashSet<>(groupFieldsList)); + String groupFieldsParam = settings.findParameter(QueryParameters.GROUP_FIELDS).getParameterValue().trim(); + if (StringUtils.isNotBlank(groupFieldsParam)) { + String[] groupFields = StringUtils.split(groupFieldsParam, Constants.PARAM_VALUE_SEP); + + // Only set the group fields if we were actually given some. + if (groupFields.length > 0) { + GroupFields groupByFields = config.getGroupFields(); + groupByFields.setGroupByFields(Sets.newHashSet(groupFields)); + + // Update the sum fields if given. + String sumFieldsParam = settings.findParameter(QueryParameters.SUM_FIELDS).getParameterValue().trim(); + if (StringUtils.isNotBlank(sumFieldsParam)) { + groupByFields.setSumFields(Sets.newHashSet(StringUtils.split(sumFieldsParam, Constants.PARAM_VALUE_SEP))); + } + + // Update the count fields if given. + String countFieldsParam = settings.findParameter(QueryParameters.COUNT_FIELDS).getParameterValue().trim(); + if (StringUtils.isNotBlank(countFieldsParam)) { + groupByFields.setCountFields(Sets.newHashSet(StringUtils.split(countFieldsParam, Constants.PARAM_VALUE_SEP))); + } + + // Update the average fields if given. + String averageFieldsParam = settings.findParameter(QueryParameters.AVERAGE_FIELDS).getParameterValue().trim(); + if (StringUtils.isNotBlank(averageFieldsParam)) { + groupByFields.setAverageFields(Sets.newHashSet(StringUtils.split(averageFieldsParam, Constants.PARAM_VALUE_SEP))); + } + + // Update the min fields if given. + String minFieldsParam = settings.findParameter(QueryParameters.MIN_FIELDS).getParameterValue().trim(); + if (StringUtils.isNotBlank(averageFieldsParam)) { + groupByFields.setMinFields(Sets.newHashSet(StringUtils.split(minFieldsParam, Constants.PARAM_VALUE_SEP))); + } + + // Update the max fields if given. + String maxFieldsParam = settings.findParameter(QueryParameters.MAX_FIELDS).getParameterValue().trim(); + if (StringUtils.isNotBlank(averageFieldsParam)) { + groupByFields.setMaxFields(Sets.newHashSet(StringUtils.split(maxFieldsParam, Constants.PARAM_VALUE_SEP))); + } + + // Update the config and the projection fields. + this.setGroupByFields(groupByFields); + config.setGroupFields(groupByFields); + config.setProjectFields(groupByFields.getProjectionFields()); } } @@ -1307,11 +1343,11 @@ public void setLimitFieldsField(String limitFieldsField) { getConfig().setLimitFieldsField(limitFieldsField); } - public Set getGroupFields() { + public GroupFields getGroupByFields() { return getConfig().getGroupFields(); } - public void setGroupFields(Set groupFields) { + public void setGroupByFields(GroupFields groupFields) { getConfig().setGroupFields(groupFields); } @@ -2100,6 +2136,11 @@ public Set getOptionalQueryParameters() { optionalParams.add(datawave.webservice.query.QueryParameters.QUERY_PAGETIMEOUT); optionalParams.add(datawave.webservice.query.QueryParameters.QUERY_EXPIRATION); optionalParams.add(datawave.webservice.query.QueryParameters.QUERY_MAX_RESULTS_OVERRIDE); + optionalParams.add(QueryParameters.SUM_FIELDS); + optionalParams.add(QueryParameters.MAX_FIELDS); + optionalParams.add(QueryParameters.MIN_FIELDS); + optionalParams.add(QueryParameters.COUNT_FIELDS); + optionalParams.add(QueryParameters.AVERAGE_FIELDS); return optionalParams; } diff --git a/warehouse/query-core/src/main/java/datawave/query/transformer/GroupingTransform.java b/warehouse/query-core/src/main/java/datawave/query/transformer/GroupingTransform.java index 1fe8ea283b..8327188210 100644 --- a/warehouse/query-core/src/main/java/datawave/query/transformer/GroupingTransform.java +++ b/warehouse/query-core/src/main/java/datawave/query/transformer/GroupingTransform.java @@ -2,36 +2,27 @@ import static org.slf4j.LoggerFactory.getLogger; -import java.math.BigDecimal; import java.util.ArrayList; -import java.util.Collection; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Map.Entry; -import java.util.Set; -import java.util.stream.Collectors; import javax.annotation.Nullable; import org.apache.accumulo.core.data.Key; -import org.apache.accumulo.core.security.ColumnVisibility; import org.slf4j.Logger; -import org.springframework.util.Assert; -import com.google.common.collect.HashMultimap; import com.google.common.collect.Maps; -import com.google.common.collect.Multimap; -import datawave.data.type.NumberType; import datawave.marking.MarkingFunctions; import datawave.query.attributes.Document; -import datawave.query.attributes.TypeAttribute; -import datawave.query.common.grouping.GroupingUtil; -import datawave.query.common.grouping.GroupingUtil.GroupCountingHashMap; -import datawave.query.common.grouping.GroupingUtil.GroupingTypeAttribute; +import datawave.query.common.grouping.DocumentGrouper; +import datawave.query.common.grouping.Group; +import datawave.query.common.grouping.GroupFields; +import datawave.query.common.grouping.GroupingUtils; +import datawave.query.common.grouping.Groups; import datawave.query.iterator.profile.FinalDocumentTrackingIterator; -import datawave.query.jexl.JexlASTHelper; import datawave.query.model.QueryModel; /** @@ -47,34 +38,15 @@ public class GroupingTransform extends DocumentTransform.DefaultDocumentTransfor /** * the fields (user provided) to group by */ - private Set groupFieldsSet; + private GroupFields groupFields; - /** - * holds the aggregated column visibilities for each grouped event - */ - private final Multimap>,ColumnVisibility> fieldVisibilities = HashMultimap.create(); - - /** - * A map of TypeAttribute collection keys to integer counts This map uses a special key type that ignores the metadata (with visibilities) in its hashCode - * and equals methods - */ - private GroupCountingHashMap countingMap; - - /** - * Provides the grouping information (counting map, field visibilities, etc) for grouping documents. - */ - private final GroupingUtil groupingUtil = new GroupingUtil(); + private final Groups groups; /** * list of documents to return, created from the countingMap */ private final LinkedList documents = new LinkedList<>(); - /** - * mapping used to combine field names that map to different model names - */ - private Map reverseModelMapping = null; - /** * list of keys that have been read, in order to keep track of where we left off when a new iterator is created */ @@ -89,28 +61,22 @@ public class GroupingTransform extends DocumentTransform.DefaultDocumentTransfor /** * Constructor * - * @param model - * the query model (can be null) - * @param groupFieldsSet - * the fields (user provided) to group by + * @param groupFields + * the fields (user provided) to group by and aggregate * @param queryExecutionForPageTimeout * how long (in milliseconds) to let a page of results to collect before signaling to return a blank page to the client * @param markingFunctions * the marking functions */ - public GroupingTransform(QueryModel model, Collection groupFieldsSet, MarkingFunctions markingFunctions, long queryExecutionForPageTimeout) { + public GroupingTransform(GroupFields groupFields, MarkingFunctions markingFunctions, long queryExecutionForPageTimeout) { super.initialize(settings, markingFunctions); this.queryExecutionForPageTimeout = queryExecutionForPageTimeout; - this.countingMap = new GroupCountingHashMap(markingFunctions); - updateConfig(groupFieldsSet, model); - log.trace("groupFieldsSet: {}", this.groupFieldsSet); + this.groups = new Groups(); + this.groupFields = groupFields; } - public void updateConfig(Collection groupFieldSet, QueryModel model) { - this.groupFieldsSet = groupFieldSet.stream().map(JexlASTHelper::deconstructIdentifier).collect(Collectors.toSet()); - if (model != null) { - reverseModelMapping = model.getReverseQueryMapping(); - } + public void updateConfig(GroupFields groupFields) { + this.groupFields = groupFields; } @Nullable @@ -127,9 +93,7 @@ public Entry apply(@Nullable Entry keyDocumentEntry) keys.add(keyDocumentEntry.getKey()); log.trace("{} get list key counts for: {}", "web-server", keyDocumentEntry); - GroupingUtil.GroupingInfo groupingInfo = groupingUtil.getGroupingInfo(keyDocumentEntry, groupFieldsSet, countingMap, reverseModelMapping); - this.countingMap = groupingInfo.getCountsMap(); - this.fieldVisibilities.putAll(groupingInfo.getFieldVisibilities()); + DocumentGrouper.group(keyDocumentEntry, groupFields, groups); } long elapsedExecutionTimeForCurrentPage = System.currentTimeMillis() - this.queryExecutionForPageStartTime; @@ -153,31 +117,9 @@ public void setQueryExecutionForPageStartTime(long queryExecutionForPageStartTim @Override public Entry flush() { Document document = null; - if (!countingMap.isEmpty()) { - - log.trace("flush will use the countingMap: {}", countingMap); - - for (Collection> entry : countingMap.keySet()) { - log.trace("from countingMap, got entry: {}", entry); - ColumnVisibility columnVisibility; - try { - columnVisibility = groupingUtil.combine(fieldVisibilities.get(entry), markingFunctions); - } catch (Exception e) { - throw new IllegalStateException("Unable to merge column visibilities: " + fieldVisibilities.get(entry), e); - } - // grab a key from those saved during getListKeyCounts - Assert.notEmpty(keys, "no available keys for grouping results"); - // use the last (most recent) key so a new iterator will know where to start - Key docKey = keys.get(keys.size() - 1); - Document d = new Document(docKey, true); - d.setColumnVisibility(columnVisibility); - - entry.forEach(base -> d.put(base.getMetadata().getRow().toString(), base)); - NumberType type = new NumberType(); - type.setDelegate(new BigDecimal(countingMap.get(entry))); - TypeAttribute attr = new TypeAttribute<>(type, new Key("count"), true); - d.put("COUNT", attr); - documents.add(d); + if (!groups.isEmpty()) { + for (Group group : groups.getGroups()) { + documents.add(GroupingUtils.createDocument(group, keys, markingFunctions, GroupingUtils.AverageAggregatorWriteFormat.AVERAGE)); } } @@ -190,11 +132,10 @@ public Entry flush() { Key key = document.getMetadata(); Entry entry = Maps.immutableEntry(key, document); log.trace("flushing out {}", entry); - countingMap.clear(); + groups.clear(); return entry; } return null; } - } diff --git a/warehouse/query-core/src/test/java/datawave/query/common/grouping/AverageAggregatorTest.java b/warehouse/query-core/src/test/java/datawave/query/common/grouping/AverageAggregatorTest.java new file mode 100644 index 0000000000..e1353fc970 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/common/grouping/AverageAggregatorTest.java @@ -0,0 +1,73 @@ +package datawave.query.common.grouping; + +import static org.junit.Assert.assertEquals; + +import java.math.BigDecimal; + +import org.apache.accumulo.core.data.Key; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import datawave.data.type.NumberType; +import datawave.data.type.Type; +import datawave.query.attributes.Content; +import datawave.query.attributes.Numeric; +import datawave.query.attributes.TypeAttribute; + +/** + * Tests for {@link AverageAggregator}. + */ +public class AverageAggregatorTest { + + private AverageAggregator aggregator; + + @Before + public void setUp() throws Exception { + aggregator = new AverageAggregator("FIELD"); + } + + /** + * Verify the initial average is 0. + */ + @Test + public void testInitialAverage() { + assertAverage(null); + } + + /** + * Verify that if given a non-numeric value, that an exception is thrown. + */ + @Test + public void testNonNumericValue() { + Content content = new Content("i am content", new Key(), true); + + IllegalArgumentException exception = Assert.assertThrows(IllegalArgumentException.class, () -> aggregator.aggregate(content)); + assertEquals("Unable to calculate an average with non-numerical value 'i am content'", exception.getMessage()); + } + + /** + * Verify that given additional numeric values, that the averages are correctly calculated. + */ + @Test + public void testAggregation() { + aggregator.aggregate(createNumeric("4")); + assertAverage(new BigDecimal("4")); + + aggregator.aggregate(createNumeric("1")); + aggregator.aggregate(createNumeric("1")); // Sum 6, count 3 + assertAverage(new BigDecimal("2")); + + aggregator.aggregate(createNumeric("4.5")); + assertAverage(new BigDecimal("2.625")); + } + + private TypeAttribute createNumeric(String number) { + Type type = new NumberType(number); + return new TypeAttribute<>(type, new Key(), true); + } + + private void assertAverage(BigDecimal average) { + assertEquals(average, aggregator.getAggregation()); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/common/grouping/CountAggregatorTest.java b/warehouse/query-core/src/test/java/datawave/query/common/grouping/CountAggregatorTest.java new file mode 100644 index 0000000000..0ced68af24 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/common/grouping/CountAggregatorTest.java @@ -0,0 +1,52 @@ +package datawave.query.common.grouping; + +import static org.junit.Assert.assertEquals; + +import org.apache.accumulo.core.data.Key; +import org.junit.Before; +import org.junit.Test; + +import datawave.query.attributes.Content; + +/** + * Tests for {@link CountAggregator}. + */ +public class CountAggregatorTest { + + private final Content value = new Content("i am content", new Key(), true); + private CountAggregator aggregator; + + @Before + public void setUp() throws Exception { + aggregator = new CountAggregator("FIELD"); + } + + /** + * Verify that the initial count is 0. + */ + @Test + public void testInitialCount() { + assertCount(0L); + } + + /** + * Verify that when a number of values are aggregated, that the count is increased by the number of values aggregated. + */ + @Test + public void testIncrementingCount() { + aggregator.aggregate(value); + assertCount(1L); + + aggregator.aggregate(value); + assertCount(2L); + + aggregator.aggregate(value); + aggregator.aggregate(value); + assertCount(4L); + } + + private void assertCount(Long count) { + assertEquals(count, aggregator.getAggregation()); + } + +} diff --git a/warehouse/query-core/src/test/java/datawave/query/common/grouping/DocumentGrouperTest.java b/warehouse/query-core/src/test/java/datawave/query/common/grouping/DocumentGrouperTest.java new file mode 100644 index 0000000000..7ec1b589cd --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/common/grouping/DocumentGrouperTest.java @@ -0,0 +1,1100 @@ +package datawave.query.common.grouping; + +import java.math.BigDecimal; +import java.util.AbstractMap; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.security.ColumnVisibility; +import org.assertj.core.util.Sets; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import com.google.common.collect.HashMultimap; +import com.google.common.collect.Multimap; + +import datawave.data.type.LcNoDiacriticsType; +import datawave.data.type.NumberType; +import datawave.data.type.Type; +import datawave.query.attributes.Attribute; +import datawave.query.attributes.Attributes; +import datawave.query.attributes.Document; +import datawave.query.attributes.TypeAttribute; +import datawave.test.GroupsAssert; + +public class DocumentGrouperTest { + + private static final ColumnVisibility COLVIS_ALL = new ColumnVisibility("ALL"); + private static final ColumnVisibility COLVIS_E = new ColumnVisibility("E"); + private static final ColumnVisibility COLVIS_I = new ColumnVisibility("I"); + private static final ColumnVisibility COLVIS_ALL_E_I = new ColumnVisibility("ALL&E&I"); + private static final Key key = new Key("test_key"); + private static final Multimap inverseReverseMap = HashMultimap.create(); + private static final Map reverseMap = new HashMap<>(); + + private GroupFields groupFields = new GroupFields(); + private Document document; + private Groups groups; + + @BeforeClass + public static void beforeClass() { + inverseReverseMap.put("GEN", "GENERE"); + inverseReverseMap.put("GEN", "GENDER"); + inverseReverseMap.put("AG", "AGE"); + inverseReverseMap.put("AG", "ETA"); + inverseReverseMap.put("LOC", "BUILDING"); + inverseReverseMap.put("LOC", "LOCATION"); + inverseReverseMap.put("PEAK", "HEIGHT"); + + reverseMap.put("GENERE", "GEN"); + reverseMap.put("GENDER", "GEN"); + reverseMap.put("AGE", "AG"); + reverseMap.put("ETA", "AG"); + reverseMap.put("BUILDING", "LOC"); + reverseMap.put("LOCATION", "LOC"); + reverseMap.put("HEIGHT", "PEAK"); + } + + @Before + public void setUp() throws Exception { + groups = new Groups(); + document = new Document(); + groupFields = new GroupFields(); + } + + /** + * Verify that when grouping by a single field that has a grouping context and instance, e.g. the format GENDER.FOO.1, that the count is correctly + * established for each distinct value of the field. + */ + @Test + public void testGroupingBySingleFieldWithGroupAndInstance() { + givenGroupFields("GENDER"); + + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.1").withLcNoDiacritics("MALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.2").withLcNoDiacritics("FEMALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.3").withLcNoDiacritics("FEMALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.BAR.1").withLcNoDiacritics("MALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.BAR.2").withLcNoDiacritics("FEMALE")); + + executeGrouping(); + + // We should have the following groupings: + // MALE (Count of 2) + // FEMALE (Count of 3) + GroupsAssert groupsAssert = GroupsAssert.assertThat(groups); + groupsAssert.hasTotalGroups(2); + groupsAssert.assertGroup(textKey("GENDER", "MALE")).hasCount(2); + groupsAssert.assertGroup(textKey("GENDER", "FEMALE")).hasCount(3); + } + + /** + * Verify that when grouping by multiple fields, where all entries have a grouping context and instance, and only direct matches need to be grouped, that + * the count is correct for each grouping. Additionally, verify that the grouping context and instance are parsed correctly from the field names. + */ + @Test + public void testGroupingFieldsWithMatchingGroupsAndInstancesAndDirectMatches() { + givenGroupFields("AGE", "GENDER"); + + givenDocumentEntry(DocumentEntry.of("AGE.FOO.A.B.C.1").withNumberType("24")); + givenDocumentEntry(DocumentEntry.of("AGE.FOO.A.B.2").withNumberType("20")); + givenDocumentEntry(DocumentEntry.of("AGE.FOO.C.3").withNumberType("20")); + givenDocumentEntry(DocumentEntry.of("AGE.BAR.B.C.1").withNumberType("40")); + givenDocumentEntry(DocumentEntry.of("AGE.BAR.V.A.2").withNumberType("20")); + + // Direct match to AGE.FOO.1. + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.A.C.1").withLcNoDiacritics("MALE")); + // Direct match to AGE.FOO.2 + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.V.S.2").withLcNoDiacritics("FEMALE")); + // Direct match to AGE.FOO.3 + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.F.3").withLcNoDiacritics("FEMALE")); + // No direct match with an AGE record, should be ignored since we have a direct match for a GENDER entry elsewhere. + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.F.G.4").withLcNoDiacritics("FEMALE")); + // Direct match to AGE.BAR.1. + givenDocumentEntry(DocumentEntry.of("GENDER.BAR.V.C.A.1").withLcNoDiacritics("MALE")); + // Direct match to AGE.BAR.2. + givenDocumentEntry(DocumentEntry.of("GENDER.BAR.G.S.2").withLcNoDiacritics("FEMALE")); + + executeGrouping(); + + // We should end up with the following groupings: + // 24-MALE (Count of 1) + // 20-FEMALE (Count of 3) + // 40-MALE (Count of 1) + GroupsAssert groupsAssert = GroupsAssert.assertThat(groups); + groupsAssert.hasTotalGroups(3); + groupsAssert.assertGroup(textKey("GENDER", "MALE"), numericKey("AGE", "24")).hasCount(1); + groupsAssert.assertGroup(textKey("GENDER", "FEMALE"), numericKey("AGE", "20")).hasCount(3); + groupsAssert.assertGroup(textKey("GENDER", "MALE"), numericKey("AGE", "40")).hasCount(1); + } + + /** + * Verify that when grouping by multiple fields with grouping contexts and instances, but no direct matches, that groupings are created that consist of each + * field being intersected with each other. + */ + @Test + public void testGroupingFieldsWithMatchingGroupsAndInstances() { + givenGroupFields("AGE", "GENDER", "BUILDING"); + + givenDocumentEntry(DocumentEntry.of("AGE.FOO.1").withNumberType("24")); + givenDocumentEntry(DocumentEntry.of("AGE.FOO.2").withNumberType("20")); + givenDocumentEntry(DocumentEntry.of("AGE.FOO.3").withNumberType("20")); + + // Direct match to AGE.FOO.1. + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.1").withLcNoDiacritics("MALE")); + // Direct match to AGE.FOO.2 + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.2").withLcNoDiacritics("FEMALE")); + // Direct match to AGE.FOO.3 + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.3").withLcNoDiacritics("FEMALE")); + // No direct match with an AGE record, should be ignored since we have a direct match for a GENDER entry elsewhere. + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.4").withLcNoDiacritics("FEMALE")); + // No direct match, but we should have a cartesian product with the AGE-GENDER direct matches. + givenDocumentEntry(DocumentEntry.of("BUILDING.BAR.1").withLcNoDiacritics("West")); + // No direct match, but we should have a cartesian product with the AGE-GENDER direct matches. + givenDocumentEntry(DocumentEntry.of("BUILDING.BAR.2").withLcNoDiacritics("East")); + + executeGrouping(); + + // We should end up with the following groupings: + // 24-MALE-West (Count of 1) + // 24-MALE-East (Count of 1) + // 20-FEMALE-West (Count of 2) + // 20-FEMALE-East (Count of 2) + GroupsAssert groupsAssert = GroupsAssert.assertThat(groups); + groupsAssert.hasTotalGroups(4); + groupsAssert.assertGroup(textKey("GENDER", "MALE"), numericKey("AGE", "24"), textKey("BUILDING", "West")).hasCount(1); + groupsAssert.assertGroup(textKey("GENDER", "MALE"), numericKey("AGE", "24"), textKey("BUILDING", "East")).hasCount(1); + groupsAssert.assertGroup(textKey("GENDER", "FEMALE"), numericKey("AGE", "20"), textKey("BUILDING", "West")).hasCount(2); + groupsAssert.assertGroup(textKey("GENDER", "FEMALE"), numericKey("AGE", "20"), textKey("BUILDING", "East")).hasCount(2); + } + + @Test + public void testGroupingFieldsWithMatchingGroupsAndInstancesAndMultipleIntersectionalityPoints() { + givenGroupFields("AGE", "GENDER", "BUILDING", "RECORD_ID", "RECORD_TITLE"); + + givenDocumentEntry(DocumentEntry.of("AGE.FOO.1").withNumberType("24")); + givenDocumentEntry(DocumentEntry.of("AGE.FOO.2").withNumberType("20")); + givenDocumentEntry(DocumentEntry.of("AGE.FOO.3").withNumberType("20")); + + // Direct match to AGE.FOO.1. + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.1").withLcNoDiacritics("MALE")); + // Direct match to AGE.FOO.2. + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.2").withLcNoDiacritics("FEMALE")); + // Direct match to AGE.FOO.3. + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.3").withLcNoDiacritics("FEMALE")); + // No direct match with an AGE record, should be ignored since we have a direct match for a GENDER entry elsewhere. + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.4").withLcNoDiacritics("FEMALE")); + + givenDocumentEntry(DocumentEntry.of("RECORD_ID.HAT.1").withNumberType("123")); + givenDocumentEntry(DocumentEntry.of("RECORD_ID.HAT.2").withNumberType("456")); + + // Direct match to RECORD_ID.HAT.1. + givenDocumentEntry(DocumentEntry.of("RECORD_TITLE.HAT.1").withLcNoDiacritics("Manual")); + // Direct match to RECORD_ID.HAT.2. + givenDocumentEntry(DocumentEntry.of("RECORD_TITLE.HAT.2").withLcNoDiacritics("Summary")); + + // No direct match, but we should have a cartesian product with the AGE-GENDER and RECORD_ID-RECORD_TITLE direct matches. + givenDocumentEntry(DocumentEntry.of("BUILDING.BAR.1").withLcNoDiacritics("West")); + // No direct match, but we should have a cartesian product with the AGE-GENDER and RECORD_ID-RECORD_TITLE direct matches. + givenDocumentEntry(DocumentEntry.of("BUILDING.BAR.2").withLcNoDiacritics("East")); + + executeGrouping(); + + // We should end up with the following groupings: + // 24-MALE-123-Manual-West (Count of 1) + // 24-MALE-456-Summary-West (Count of 1) + // 24-MALE-123-Manual-East (Count of 1) + // 24-MALE-456-Summary-East (Count of 1) + // 20-FEMALE-123-Manual-West (Count of 2) + // 20-FEMALE-456-Summary-West (Count of 2) + // 20-FEMALE-123-Manual-East (Count of 2) + // 20-FEMALE-456-Summary-East (Count of 2) + + GroupsAssert groupsAssert = GroupsAssert.assertThat(groups); + groupsAssert.hasTotalGroups(8); + + // @formatter:off + groupsAssert.assertGroup(textKey("GENDER", "MALE"), + numericKey("AGE", "24"), + textKey("BUILDING", "West"), + numericKey("RECORD_ID", "123"), + textKey("RECORD_TITLE", "Manual")).hasCount(1); + + groupsAssert.assertGroup(textKey("GENDER", "MALE"), + numericKey("AGE", "24"), + textKey("BUILDING", "West"), + numericKey("RECORD_ID", "456"), + textKey("RECORD_TITLE", "Summary")).hasCount(1); + + groupsAssert.assertGroup(textKey("GENDER", "MALE"), + numericKey("AGE", "24"), + textKey("BUILDING", "East"), + numericKey("RECORD_ID", "123"), + textKey("RECORD_TITLE", "Manual")).hasCount(1); + + groupsAssert.assertGroup(textKey("GENDER", "MALE"), + numericKey("AGE", "24"), + textKey("BUILDING", "East"), + numericKey("RECORD_ID", "456"), + textKey("RECORD_TITLE", "Summary")).hasCount(1); + + groupsAssert.assertGroup(textKey("GENDER", "FEMALE"), + numericKey("AGE", "20"), + textKey("BUILDING", "West"), + numericKey("RECORD_ID", "123"), + textKey("RECORD_TITLE", "Manual")).hasCount(2); + + groupsAssert.assertGroup(textKey("GENDER", "FEMALE"), + numericKey("AGE", "20"), + textKey("BUILDING", "West"), + numericKey("RECORD_ID", "456"), + textKey("RECORD_TITLE", "Summary")).hasCount(2); + + groupsAssert.assertGroup(textKey("GENDER", "FEMALE"), + numericKey("AGE", "20"), + textKey("BUILDING", "East"), + numericKey("RECORD_ID", "123"), + textKey("RECORD_TITLE", "Manual")).hasCount(2); + + groupsAssert.assertGroup(textKey("GENDER", "FEMALE"), + numericKey("AGE", "20"), + textKey("BUILDING", "East"), + numericKey("RECORD_ID", "456"), + textKey("RECORD_TITLE", "Summary")).hasCount(2); + // @formatter:on + } + + @Test + public void testGroupingBySingleFieldWithInstanceOnly() { + givenGroupFields("GENDER"); + + givenDocumentEntry(DocumentEntry.of("GENDER.1").withLcNoDiacritics("MALE").withLcNoDiacritics("FEMALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.2").withLcNoDiacritics("MALE").withLcNoDiacritics("FEMALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.3").withLcNoDiacritics("FEMALE")); + + executeGrouping(); + + // We should have the following groupings: + // MALE (Count of 2) + // FEMALE (Count of 3) + GroupsAssert groupsAssert = GroupsAssert.assertThat(groups); + groupsAssert.hasTotalGroups(2); + groupsAssert.assertGroup(textKey("GENDER", "MALE")).hasCount(2); + groupsAssert.assertGroup(textKey("GENDER", "FEMALE")).hasCount(3); + } + + @Test + public void testGroupingMultipleFieldsWithInstanceOnly() { + givenGroupFields("BUILDING", "AGE"); + + givenDocumentEntry(DocumentEntry.of("BUILDING.1").withLcNoDiacritics("West").withLcNoDiacritics("East")); + givenDocumentEntry(DocumentEntry.of("BUILDING.2").withLcNoDiacritics("West")); + + // No direct match, we should have a cartesian product with each BUILDING value. + givenDocumentEntry(DocumentEntry.of("AGE.1").withNumberType("20")); + // No direct match, we should have a cartesian product with each BUILDING value. + givenDocumentEntry(DocumentEntry.of("AGE.2").withNumberType("24")); + + executeGrouping(); + + // We should have the following groupings: + // West-20 (Count of 2) + // West-24 (Count of 2) + // East-20 (Count of 1) + // East-24 (Count of 1) + GroupsAssert groupsAssert = GroupsAssert.assertThat(groups); + groupsAssert.hasTotalGroups(4); + groupsAssert.assertGroup(textKey("BUILDING", "West"), numericKey("AGE", "20")).hasCount(2); + groupsAssert.assertGroup(textKey("BUILDING", "West"), numericKey("AGE", "24")).hasCount(2); + groupsAssert.assertGroup(textKey("BUILDING", "East"), numericKey("AGE", "20")).hasCount(1); + groupsAssert.assertGroup(textKey("BUILDING", "East"), numericKey("AGE", "24")).hasCount(1); + } + + @Test + public void testGroupingBySingleFieldWithoutInstance() { + givenGroupFields("GENDER"); + + givenDocumentEntry(DocumentEntry.of("GENDER").withLcNoDiacritics("MALE").withLcNoDiacritics("FEMALE")); + + executeGrouping(); + + // We should have the following groupings: + // MALE (Count of 1) + // FEMALE (Count of 1) + GroupsAssert groupsAssert = GroupsAssert.assertThat(groups); + groupsAssert.hasTotalGroups(2); + groupsAssert.assertGroup(textKey("GENDER", "MALE")).hasCount(1); + groupsAssert.assertGroup(textKey("GENDER", "FEMALE")).hasCount(1); + } + + @Test + public void testGroupingByMultipleFieldsWithoutInstance() { + givenGroupFields("GENDER", "BUILDING"); + + givenDocumentEntry(DocumentEntry.of("GENDER").withLcNoDiacritics("MALE").withLcNoDiacritics("FEMALE")); + givenDocumentEntry(DocumentEntry.of("BUILDING").withLcNoDiacritics("East").withLcNoDiacritics("West").withLcNoDiacritics("North")); + + executeGrouping(); + + // We should have the following groupings: + // MALE-East (Count of 1) + // MALE-West (Count of 1) + // MALE-North (Count of 1) + // FEMALE-East (Count of 1) + // FEMALE-West (Count of 1) + // FEMALE-North (Count of 1) + GroupsAssert groupsAssert = GroupsAssert.assertThat(groups); + groupsAssert.hasTotalGroups(6); + groupsAssert.assertGroup(textKey("GENDER", "MALE"), textKey("BUILDING", "West")).hasCount(1); + groupsAssert.assertGroup(textKey("GENDER", "MALE"), textKey("BUILDING", "East")).hasCount(1); + groupsAssert.assertGroup(textKey("GENDER", "MALE"), textKey("BUILDING", "North")).hasCount(1); + groupsAssert.assertGroup(textKey("GENDER", "FEMALE"), textKey("BUILDING", "West")).hasCount(1); + groupsAssert.assertGroup(textKey("GENDER", "FEMALE"), textKey("BUILDING", "East")).hasCount(1); + groupsAssert.assertGroup(textKey("GENDER", "FEMALE"), textKey("BUILDING", "North")).hasCount(1); + } + + @Test + public void testGroupingBySingleFieldAcrossMultipleDocuments() { + givenGroupFields("GENDER"); + + givenDocumentColumnVisibility(COLVIS_ALL); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.1").withLcNoDiacritics("MALE", COLVIS_ALL)); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.2").withLcNoDiacritics("MALE", COLVIS_ALL)); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.3").withLcNoDiacritics("MALE", COLVIS_ALL)); + + executeGrouping(); + + resetDocument(); + givenDocumentColumnVisibility(COLVIS_E); + givenDocumentEntry(DocumentEntry.of("GENDER.BAR.1").withLcNoDiacritics("FEMALE", COLVIS_I)); + givenDocumentEntry(DocumentEntry.of("GENDER.BAR.2").withLcNoDiacritics("MALE", COLVIS_I)); + givenDocumentEntry(DocumentEntry.of("GENDER.HAT.1").withLcNoDiacritics("FEMALE", COLVIS_I)); + + executeGrouping(); + + resetDocument(); + givenDocumentColumnVisibility(COLVIS_I); + givenDocumentEntry(DocumentEntry.of("GENDER.TIN.1").withLcNoDiacritics("FEMALE", COLVIS_E)); + + executeGrouping(); + + // We should expect the following groups: + // MALE (Count of 4) + // FEMALE (Count of 3) + GroupsAssert groupsAssert = GroupsAssert.assertThat(groups); + groupsAssert.hasTotalGroups(2); + + groupsAssert.assertGroup(textKey("GENDER", "MALE")).hasCount(4).hasDocumentVisibilities(COLVIS_ALL, COLVIS_I) + .hasVisibilitiesForKey(textKey("GENDER", "MALE"), COLVIS_ALL, COLVIS_I); + groupsAssert.assertGroup(textKey("GENDER", "FEMALE")).hasCount(3).hasDocumentVisibilities(COLVIS_I, COLVIS_E) + .hasVisibilitiesForKey(textKey("GENDER", "FEMALE"), COLVIS_I, COLVIS_E); + } + + @Test + public void testGroupingByMultipleFieldsWithDifferentFormatsAcrossMultipleDocuments() { + // @formatter:off + givenGroupFields("GENDER", "BUILDING"); + + givenDocumentColumnVisibility(COLVIS_I); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.1").withLcNoDiacritics("MALE", COLVIS_ALL)); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.2").withLcNoDiacritics("FEMALE", COLVIS_ALL)); + givenDocumentEntry(DocumentEntry.of("BUILDING.1").withLcNoDiacritics("West", COLVIS_E).withLcNoDiacritics("East", COLVIS_I)); + + executeGrouping(); + + // This document contains only BUILDING entries. Because of this, we should see a single "North" and "South" grouping. + resetDocument(); + givenDocumentColumnVisibility(COLVIS_ALL); + givenDocumentEntry(DocumentEntry.of("BUILDING.1").withLcNoDiacritics("North") + .withLcNoDiacritics("South")); + + executeGrouping(); + + resetDocument(); + givenDocumentColumnVisibility(COLVIS_E); + givenDocumentEntry(DocumentEntry.of("GENDER.TIN.1").withLcNoDiacritics("MALE", COLVIS_ALL)); + givenDocumentEntry(DocumentEntry.of("GENDER.TIN.2").withLcNoDiacritics("MALE", COLVIS_ALL)); + givenDocumentEntry(DocumentEntry.of("BUILDING.1").withLcNoDiacritics("Center", COLVIS_ALL)); + + executeGrouping(); + + // @formatter:on + + // We should expect the following groups: + // MALE-West (Count of 1) + // MALE-East (Count of 1) + // FEMALE-West (Count of 1) + // FEMALE-East (Count of 1) + // MALE-Center (Count of 2) + // North (Count of 1) + // South (Count of 1) + + GroupsAssert groupsAssert = GroupsAssert.assertThat(groups); + groupsAssert.hasTotalGroups(7); + + groupsAssert.assertGroup(textKey("GENDER", "MALE"), textKey("BUILDING", "West")).hasCount(1).hasDocumentVisibilities(COLVIS_ALL_E_I) + .hasVisibilitiesForKey(textKey("GENDER", "MALE"), COLVIS_ALL).hasVisibilitiesForKey(textKey("BUILDING", "West"), COLVIS_E); + groupsAssert.assertGroup(textKey("GENDER", "MALE"), textKey("BUILDING", "East")).hasCount(1).hasDocumentVisibilities(COLVIS_ALL_E_I) + .hasVisibilitiesForKey(textKey("GENDER", "MALE"), COLVIS_ALL).hasVisibilitiesForKey(textKey("BUILDING", "East"), COLVIS_I); + groupsAssert.assertGroup(textKey("GENDER", "FEMALE"), textKey("BUILDING", "West")).hasCount(1).hasDocumentVisibilities(COLVIS_ALL_E_I) + .hasVisibilitiesForKey(textKey("GENDER", "FEMALE"), COLVIS_ALL).hasVisibilitiesForKey(textKey("BUILDING", "West"), COLVIS_E); + groupsAssert.assertGroup(textKey("GENDER", "FEMALE"), textKey("BUILDING", "East")).hasCount(1).hasDocumentVisibilities(COLVIS_ALL_E_I) + .hasVisibilitiesForKey(textKey("GENDER", "FEMALE"), COLVIS_ALL).hasVisibilitiesForKey(textKey("BUILDING", "East"), COLVIS_I); + groupsAssert.assertGroup(textKey("GENDER", "MALE"), textKey("BUILDING", "Center")).hasCount(2).hasDocumentVisibilities(COLVIS_ALL) + .hasVisibilitiesForKey(textKey("GENDER", "MALE"), COLVIS_ALL).hasVisibilitiesForKey(textKey("BUILDING", "Center"), COLVIS_ALL); + groupsAssert.assertGroup(textKey("BUILDING", "North")).hasCount(1).hasDocumentVisibilities(COLVIS_ALL); + groupsAssert.assertGroup(textKey("BUILDING", "South")).hasCount(1).hasDocumentVisibilities(COLVIS_ALL); + } + + @Test + public void testAggregatingFieldWithGroupingContextAndInstanceWithDirectMatches() { + givenGroupFields("GENDER"); + givenSumFields("AGE"); + givenMaxFields("AGE"); + givenMinFields("AGE"); + givenCountFields("AGE"); + givenAverageFields("AGE"); + + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.1").withLcNoDiacritics("MALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.2").withLcNoDiacritics("FEMALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.3").withLcNoDiacritics("MALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.4").withLcNoDiacritics("FEMALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.5").withLcNoDiacritics("FEMALE")); + + // Should aggregate to any grouping that contains GENDER.FOO.1. + givenDocumentEntry(DocumentEntry.of("AGE.FOO.1").withNumberType("20")); + // Should aggregate to any grouping that contains GENDER.FOO.2. + givenDocumentEntry(DocumentEntry.of("AGE.FOO.2").withNumberType("5")); + // Should aggregate to any grouping that contains GENDER.FOO.3. + givenDocumentEntry(DocumentEntry.of("AGE.FOO.3").withNumberType("15")); + // Should aggregate to any grouping that contains GENDER.FOO.4. + givenDocumentEntry(DocumentEntry.of("AGE.FOO.4").withNumberType("30")); + // Should aggregate to any grouping that contains GENDER.FOO.5. + givenDocumentEntry(DocumentEntry.of("AGE.FOO.5").withNumberType("50")); + // Should not aggregate to any groupings since it does not have a direct match, but other direct matches exist for AGE. + givenDocumentEntry(DocumentEntry.of("AGE.FOO.6").withNumberType("100")); + + executeGrouping(); + + // We should expect the following groups: + // MALE (Count of 2) + // FEMALE (Count of 3) + GroupsAssert groupsAssert = GroupsAssert.assertThat(groups); + groupsAssert.hasTotalGroups(2); + + // @formatter:off + groupsAssert.assertGroup(textKey("GENDER", "MALE")).hasCount(2) + .hasAggregatedSum("AGE", new BigDecimal("35")) + .hasAggregatedCount("AGE", 2L) + .hasAggregatedAverage("AGE", new BigDecimal("17.5")) + .hasAggregatedMax("AGE", new NumberType("20")) + .hasAggregatedMin("AGE", new NumberType("15")); + + groupsAssert.assertGroup(textKey("GENDER", "FEMALE")).hasCount(3) + .hasAggregatedSum("AGE", new BigDecimal("85")) + .hasAggregatedCount("AGE", 3L) + .hasAggregatedAverage("AGE", new BigDecimal("28.33333333")) + .hasAggregatedMax("AGE", new NumberType("50")) + .hasAggregatedMin("AGE", new NumberType("5")); + // @formatter:on + } + + @Test + public void testAggregatingFieldWithGroupingContextAndInstanceWithNoDirectMatches() { + givenGroupFields("GENDER"); + givenSumFields("AGE"); + givenMaxFields("AGE"); + givenMinFields("AGE"); + givenCountFields("AGE"); + givenAverageFields("AGE"); + + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.1").withLcNoDiacritics("MALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.2").withLcNoDiacritics("FEMALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.3").withLcNoDiacritics("MALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.4").withLcNoDiacritics("FEMALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.5").withLcNoDiacritics("FEMALE")); + + // Every AGE value should aggregate to every GENDER grouping since no direct matches exist between an AGE and GENDER entry. + givenDocumentEntry(DocumentEntry.of("AGE.BAR.1").withNumberType("20")); + givenDocumentEntry(DocumentEntry.of("AGE.BAR.2").withNumberType("5")); + givenDocumentEntry(DocumentEntry.of("AGE.BAR.3").withNumberType("15")); + givenDocumentEntry(DocumentEntry.of("AGE.HAT.1").withNumberType("30")); + givenDocumentEntry(DocumentEntry.of("AGE.HAT.2").withNumberType("50")); + + executeGrouping(); + + // We should expect the following groups: + // MALE (Count of 2) + // FEMALE (Count of 3) + // We should also expect the aggregation results to be the same for each group. + GroupsAssert groupsAssert = GroupsAssert.assertThat(groups); + groupsAssert.hasTotalGroups(2); + + // @formatter:off + groupsAssert.assertGroup(textKey("GENDER", "MALE")).hasCount(2) + .hasAggregatedSum("AGE", new BigDecimal("120")) + .hasAggregatedCount("AGE", 5L) + .hasAggregatedAverage("AGE", new BigDecimal("24")) + .hasAggregatedMax("AGE", new NumberType("50")) + .hasAggregatedMin("AGE", new NumberType("5")); + + groupsAssert.assertGroup(textKey("GENDER", "FEMALE")).hasCount(3) + .hasAggregatedSum("AGE", new BigDecimal("120")) + .hasAggregatedCount("AGE", 5L) + .hasAggregatedAverage("AGE", new BigDecimal("24")) + .hasAggregatedMax("AGE", new NumberType("50")) + .hasAggregatedMin("AGE", new NumberType("5")); + // @formatter:on + } + + @Test + public void testAggregatingFieldWithInstanceOnly() { + givenGroupFields("GENDER"); + givenSumFields("AGE"); + givenMaxFields("AGE"); + givenMinFields("AGE"); + givenCountFields("AGE"); + givenAverageFields("AGE"); + + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.1").withLcNoDiacritics("MALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.2").withLcNoDiacritics("FEMALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.3").withLcNoDiacritics("MALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.4").withLcNoDiacritics("FEMALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.5").withLcNoDiacritics("FEMALE")); + + // Every AGE value should aggregate to every GENDER grouping since no direct matches exist between an AGE and GENDER entry. + givenDocumentEntry(DocumentEntry.of("AGE.1").withNumberType("20").withNumberType("5").withNumberType("15")); + givenDocumentEntry(DocumentEntry.of("AGE.2").withNumberType("30").withNumberType("50")); + + executeGrouping(); + + // We should expect the following groups: + // MALE (Count of 2) + // FEMALE (Count of 3) + // We should also expect the aggregation results to be the same for each group. + GroupsAssert groupsAssert = GroupsAssert.assertThat(groups); + groupsAssert.hasTotalGroups(2); + + // @formatter:off + groupsAssert.assertGroup(textKey("GENDER", "MALE")).hasCount(2) + .hasAggregatedSum("AGE", new BigDecimal("120")) + .hasAggregatedCount("AGE", 5L) + .hasAggregatedAverage("AGE", new BigDecimal("24")) + .hasAggregatedMax("AGE", new NumberType("50")) + .hasAggregatedMin("AGE", new NumberType("5")); + + groupsAssert.assertGroup(textKey("GENDER", "FEMALE")).hasCount(3) + .hasAggregatedSum("AGE", new BigDecimal("120")) + .hasAggregatedCount("AGE", 5L) + .hasAggregatedAverage("AGE", new BigDecimal("24")) + .hasAggregatedMax("AGE", new NumberType("50")) + .hasAggregatedMin("AGE", new NumberType("5")); + // @formatter:on + } + + @Test + public void testAggregatingFieldWithoutInstance() { + givenGroupFields("GENDER"); + givenSumFields("AGE"); + givenMaxFields("AGE"); + givenMinFields("AGE"); + givenCountFields("AGE"); + givenAverageFields("AGE"); + + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.1").withLcNoDiacritics("MALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.2").withLcNoDiacritics("FEMALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.3").withLcNoDiacritics("MALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.4").withLcNoDiacritics("FEMALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.5").withLcNoDiacritics("FEMALE")); + + // Every AGE value should aggregate to every GENDER grouping since no direct matches exist between an AGE and GENDER entry. + givenDocumentEntry(DocumentEntry.of("AGE").withNumberType("20").withNumberType("5").withNumberType("15").withNumberType("30").withNumberType("50")); + + executeGrouping(); + + // We should expect the following groups: + // MALE (Count of 2) + // FEMALE (Count of 3) + // We should also expect the aggregation results to be the same for each group. + GroupsAssert groupsAssert = GroupsAssert.assertThat(groups); + groupsAssert.hasTotalGroups(2); + + // @formatter:off + groupsAssert.assertGroup(textKey("GENDER", "MALE")).hasCount(2) + .hasAggregatedSum("AGE", new BigDecimal("120")) + .hasAggregatedCount("AGE", 5L) + .hasAggregatedAverage("AGE", new BigDecimal("24")) + .hasAggregatedMax("AGE", new NumberType("50")) + .hasAggregatedMin("AGE", new NumberType("5")); + + groupsAssert.assertGroup(textKey("GENDER", "FEMALE")).hasCount(3) + .hasAggregatedSum("AGE", new BigDecimal("120")) + .hasAggregatedCount("AGE", 5L) + .hasAggregatedAverage("AGE", new BigDecimal("24")) + .hasAggregatedMax("AGE", new NumberType("50")) + .hasAggregatedMin("AGE", new NumberType("5")); + // @formatter:on + } + + @Test + public void testAggregatingFieldsWithMixedFormats() { + givenGroupFields("GENDER", "AGE"); + givenSumFields("HEIGHT"); + givenMaxFields("HEIGHT", "BUILDING"); + givenMinFields("BUILDING"); + givenCountFields("HEIGHT", "BUILDING"); + givenAverageFields("HEIGHT"); + + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.1").withLcNoDiacritics("MALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.2").withLcNoDiacritics("FEMALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.3").withLcNoDiacritics("MALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.4").withLcNoDiacritics("FEMALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.5").withLcNoDiacritics("FEMALE")); + + givenDocumentEntry(DocumentEntry.of("AGE.FOO.1").withNumberType("20")); + givenDocumentEntry(DocumentEntry.of("AGE.FOO.2").withNumberType("5")); + givenDocumentEntry(DocumentEntry.of("AGE.FOO.3").withNumberType("20")); + givenDocumentEntry(DocumentEntry.of("AGE.FOO.4").withNumberType("30")); + givenDocumentEntry(DocumentEntry.of("AGE.FOO.5").withNumberType("5")); + + givenDocumentEntry(DocumentEntry.of("HEIGHT.FOO.1").withNumberType("50")); + givenDocumentEntry(DocumentEntry.of("HEIGHT.FOO.2").withNumberType("65")); + givenDocumentEntry(DocumentEntry.of("HEIGHT.FOO.3").withNumberType("60")); + givenDocumentEntry(DocumentEntry.of("HEIGHT.FOO.4").withNumberType("55")); + givenDocumentEntry(DocumentEntry.of("HEIGHT.FOO.5").withNumberType("48")); + + givenDocumentEntry(DocumentEntry.of("BUILDING.1").withLcNoDiacritics("West")); + givenDocumentEntry(DocumentEntry.of("BUILDING.2").withLcNoDiacritics("North").withLcNoDiacritics("East")); + + executeGrouping(); + + GroupsAssert groupsAssert = GroupsAssert.assertThat(groups); + groupsAssert.hasTotalGroups(3); + + // @formatter:off + groupsAssert.assertGroup(textKey("GENDER", "FEMALE"), numericKey("AGE", "30")).hasCount(1) + .hasAggregatedMax("BUILDING", new LcNoDiacriticsType("West")) + .hasAggregatedMin("BUILDING", new LcNoDiacriticsType("East")) + .hasAggregatedCount("BUILDING", 3L) + .hasAggregatedMax("HEIGHT", new NumberType("55")) + .hasAggregatedCount("HEIGHT", 1L) + .hasAggregatedSum("HEIGHT", new BigDecimal("55")) + .hasAggregatedAverage("HEIGHT", new BigDecimal("55")); + + groupsAssert.assertGroup(textKey("GENDER", "FEMALE"), numericKey("AGE", "5")).hasCount(2) + .hasAggregatedMax("BUILDING", new LcNoDiacriticsType("West")) + .hasAggregatedMin("BUILDING", new LcNoDiacriticsType("East")) + .hasAggregatedCount("BUILDING", 3L) + .hasAggregatedMax("HEIGHT", new NumberType("65")) + .hasAggregatedCount("HEIGHT", 2L) + .hasAggregatedSum("HEIGHT", new BigDecimal("113")) + .hasAggregatedAverage("HEIGHT", new BigDecimal("56.5")); + + groupsAssert.assertGroup(textKey("GENDER", "MALE"), numericKey("AGE", "20")).hasCount(2) + .hasAggregatedMax("BUILDING", new LcNoDiacriticsType("West")) + .hasAggregatedMin("BUILDING", new LcNoDiacriticsType("East")) + .hasAggregatedCount("BUILDING", 3L) + .hasAggregatedMax("HEIGHT", new NumberType("60")) + .hasAggregatedCount("HEIGHT", 2L) + .hasAggregatedSum("HEIGHT", new BigDecimal("110")) + .hasAggregatedAverage("HEIGHT", new BigDecimal("55")); + // @formatter:on + } + + @Test + public void testAggregationAcrossMultipleDocuments() { + givenGroupFields("GENDER", "AGE"); + givenSumFields("HEIGHT"); + + // We should see groups being counted and aggregation for HEIGHT occurring. + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.1").withLcNoDiacritics("MALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.2").withLcNoDiacritics("FEMALE")); + givenDocumentEntry(DocumentEntry.of("AGE.FOO.1").withNumberType("20")); + givenDocumentEntry(DocumentEntry.of("AGE.FOO.2").withNumberType("15")); + // Should aggregate to FOO.1 grouping. + givenDocumentEntry(DocumentEntry.of("HEIGHT.FOO.1").withNumberType("5")); + // Should aggregate to FOO.2 grouping. + givenDocumentEntry(DocumentEntry.of("HEIGHT.FOO.2").withNumberType("5")); + // Should not aggregate to anything since there is no direct match for this HEIGHT entry but there are direct matches for other HEIGHT entries. + givenDocumentEntry(DocumentEntry.of("HEIGHT.FOO.3").withNumberType("5")); + + executeGrouping(); + + // We should see groups being counted, but no aggregation should occur since there are no HEIGHT entries. + resetDocument(); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.1").withLcNoDiacritics("MALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.2").withLcNoDiacritics("FEMALE")); + givenDocumentEntry(DocumentEntry.of("AGE.FOO.1").withNumberType("20")); + givenDocumentEntry(DocumentEntry.of("AGE.FOO.2").withNumberType("15")); + + executeGrouping(); + + // We should see single value groupings for "MALE" and "FEMALE" being count, with aggregation for HEIGHT occurring. + resetDocument(); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.1").withLcNoDiacritics("MALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.2").withLcNoDiacritics("FEMALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.3").withLcNoDiacritics("FEMALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.4").withLcNoDiacritics("FEMALE")); + givenDocumentEntry(DocumentEntry.of("HEIGHT.FOO.1").withNumberType("5")); + givenDocumentEntry(DocumentEntry.of("HEIGHT.FOO.2").withNumberType("5")); + givenDocumentEntry(DocumentEntry.of("HEIGHT.FOO.3").withNumberType("5")); + + executeGrouping(); + + // We should see groups being counted and aggregation for HEIGHT occurring. + resetDocument(); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.1").withLcNoDiacritics("MALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.2").withLcNoDiacritics("MALE")); + givenDocumentEntry(DocumentEntry.of("AGE.FOO.1").withNumberType("20")); + givenDocumentEntry(DocumentEntry.of("AGE.FOO.2").withNumberType("15")); + // Should aggregate to FOO.1 grouping. + givenDocumentEntry(DocumentEntry.of("HEIGHT.FOO.1").withNumberType("5")); + // Should aggregate to FOO.2 grouping. + givenDocumentEntry(DocumentEntry.of("HEIGHT.FOO.2").withNumberType("5")); + + executeGrouping(); + + // We should see the HEIGHT aggregated towards an empty grouping. + resetDocument(); + givenDocumentEntry(DocumentEntry.of("ADDRESS").withLcNoDiacritics("Los Angeles")); + givenDocumentEntry(DocumentEntry.of("HEIGHT.FOO.1").withNumberType("5")); + givenDocumentEntry(DocumentEntry.of("HEIGHT.FOO.2").withNumberType("10")); + + executeGrouping(); + + // We should see the empty grouping count increase by 1. + resetDocument(); + givenDocumentEntry(DocumentEntry.of("ADDRESS").withLcNoDiacritics("New York City")); + + executeGrouping(); + + // We should see the empty grouping count increase by 1 and the HEIGHT entries aggregated towards it. + resetDocument(); + givenDocumentEntry(DocumentEntry.of("ADDRESS").withLcNoDiacritics("San Diego")); + givenDocumentEntry(DocumentEntry.of("HEIGHT.FOO.1").withNumberType("1")); + givenDocumentEntry(DocumentEntry.of("HEIGHT.FOO.2").withNumberType("1")); + givenDocumentEntry(DocumentEntry.of("HEIGHT.FOO.3").withNumberType("1")); + + executeGrouping(); + + GroupsAssert groupsAssert = GroupsAssert.assertThat(groups); + groupsAssert.hasTotalGroups(6); + + // @formatter:off + groupsAssert.assertGroup(textKey("GENDER", "MALE")).hasCount(1) + .hasAggregatedSum("HEIGHT", new BigDecimal("5")); + + groupsAssert.assertGroup(textKey("GENDER", "FEMALE")).hasCount(3) + .hasAggregatedSum("HEIGHT", new BigDecimal("10")); + + groupsAssert.assertGroup(textKey("GENDER", "MALE"), numericKey("AGE", "15")).hasCount(1) + .hasAggregatedSum("HEIGHT", new BigDecimal("5")); + + groupsAssert.assertGroup(textKey("GENDER", "FEMALE"), numericKey("AGE", "15")).hasCount(2) + .hasAggregatedSum("HEIGHT", new BigDecimal("5")); + + groupsAssert.assertGroup(textKey("GENDER", "MALE"), numericKey("AGE", "20")).hasCount(3) + .hasAggregatedSum("HEIGHT", new BigDecimal("10")); + + groupsAssert.assertGroup(Grouping.emptyGrouping()).hasCount(3) + .hasAggregatedSum("HEIGHT", new BigDecimal("18")); + // @formatter:on + } + + @Test + public void testAggregatingFieldsWithMixedFormatsWithModelMapping() { + givenGroupFields("GEN", "AG"); + givenSumFields("PEAK"); + givenMaxFields("PEAK", "LOC"); + givenMinFields("LOC"); + givenCountFields("PEAK", "LOC"); + givenAverageFields("PEAK"); + + givenRemappedFields(); + + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.1").withLcNoDiacritics("MALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.2").withLcNoDiacritics("FEMALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.3").withLcNoDiacritics("MALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.4").withLcNoDiacritics("FEMALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.5").withLcNoDiacritics("FEMALE")); + + givenDocumentEntry(DocumentEntry.of("ETA.FOO.1").withNumberType("20")); + givenDocumentEntry(DocumentEntry.of("ETA.FOO.2").withNumberType("5")); + givenDocumentEntry(DocumentEntry.of("ETA.FOO.3").withNumberType("20")); + givenDocumentEntry(DocumentEntry.of("ETA.FOO.4").withNumberType("30")); + givenDocumentEntry(DocumentEntry.of("ETA.FOO.5").withNumberType("5")); + + givenDocumentEntry(DocumentEntry.of("HEIGHT.FOO.1").withNumberType("50")); + givenDocumentEntry(DocumentEntry.of("HEIGHT.FOO.2").withNumberType("65")); + givenDocumentEntry(DocumentEntry.of("HEIGHT.FOO.3").withNumberType("60")); + givenDocumentEntry(DocumentEntry.of("HEIGHT.FOO.4").withNumberType("55")); + givenDocumentEntry(DocumentEntry.of("HEIGHT.FOO.5").withNumberType("48")); + + givenDocumentEntry(DocumentEntry.of("LOCATION.1").withLcNoDiacritics("West")); + givenDocumentEntry(DocumentEntry.of("LOCATION.2").withLcNoDiacritics("North").withLcNoDiacritics("East")); + + executeGrouping(); + + // We should see each field mapped to their root model name. + GroupsAssert groupsAssert = GroupsAssert.assertThat(groups); + groupsAssert.hasTotalGroups(3); + + // @formatter:off + groupsAssert.assertGroup(textKey("GEN", "FEMALE"), numericKey("AG", "30")).hasCount(1) + .hasAggregatedMax("LOC", new LcNoDiacriticsType("West")) + .hasAggregatedMin("LOC", new LcNoDiacriticsType("East")) + .hasAggregatedCount("LOC", 3L) + .hasAggregatedMax("PEAK", new NumberType("55")) + .hasAggregatedCount("PEAK", 1L) + .hasAggregatedSum("PEAK", new BigDecimal("55")) + .hasAggregatedAverage("PEAK", new BigDecimal("55")); + + groupsAssert.assertGroup(textKey("GEN", "FEMALE"), numericKey("AG", "5")).hasCount(2) + .hasAggregatedMax("LOC", new LcNoDiacriticsType("West")) + .hasAggregatedMin("LOC", new LcNoDiacriticsType("East")) + .hasAggregatedCount("LOC", 3L) + .hasAggregatedMax("PEAK", new NumberType("65")) + .hasAggregatedCount("PEAK", 2L) + .hasAggregatedSum("PEAK", new BigDecimal("113")) + .hasAggregatedAverage("PEAK", new BigDecimal("56.5")); + + groupsAssert.assertGroup(textKey("GEN", "MALE"), numericKey("AG", "20")).hasCount(2) + .hasAggregatedMax("LOC", new LcNoDiacriticsType("West")) + .hasAggregatedMin("LOC", new LcNoDiacriticsType("East")) + .hasAggregatedCount("LOC", 3L) + .hasAggregatedMax("PEAK", new NumberType("60")) + .hasAggregatedCount("PEAK", 2L) + .hasAggregatedSum("PEAK", new BigDecimal("110")) + .hasAggregatedAverage("PEAK", new BigDecimal("55")); + // @formatter:on + } + + @Test + public void testAggregationAcrossMultipleDocumentsWithModelMapping() { + givenGroupFields("GEN", "AG"); + givenSumFields("PEAK"); + + givenRemappedFields(); + + // We should see groups being counted and aggregation for HEIGHT occurring. + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.1").withLcNoDiacritics("MALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.2").withLcNoDiacritics("FEMALE")); + givenDocumentEntry(DocumentEntry.of("AGE.FOO.1").withNumberType("20")); + givenDocumentEntry(DocumentEntry.of("AGE.FOO.2").withNumberType("15")); + // Should aggregate to FOO.1 grouping. + givenDocumentEntry(DocumentEntry.of("HEIGHT.FOO.1").withNumberType("5")); + // Should aggregate to FOO.2 grouping. + givenDocumentEntry(DocumentEntry.of("HEIGHT.FOO.2").withNumberType("5")); + // Should not aggregate to anything since there is no direct match for this HEIGHT entry but there are direct matches for other HEIGHT entries. + givenDocumentEntry(DocumentEntry.of("HEIGHT.FOO.3").withNumberType("5")); + + executeGrouping(); + + // We should see the groups "MALE" and "FEMALE" with no aggregation. + resetDocument(); + givenDocumentEntry(DocumentEntry.of("GENERE.FOO.1").withLcNoDiacritics("MALE")); + givenDocumentEntry(DocumentEntry.of("GENERE.FOO.2").withLcNoDiacritics("FEMALE")); + givenDocumentEntry(DocumentEntry.of("ETA.FOO.1").withNumberType("20")); + givenDocumentEntry(DocumentEntry.of("ETA.FOO.2").withNumberType("15")); + + executeGrouping(); + + // We should see the groups "MALE" and "FEMALE" with aggregation of the HEIGHT entries. + resetDocument(); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.1").withLcNoDiacritics("MALE")); + givenDocumentEntry(DocumentEntry.of("GENDER.FOO.2").withLcNoDiacritics("FEMALE")); + givenDocumentEntry(DocumentEntry.of("HEIGHT.FOO.1").withNumberType("5")); + givenDocumentEntry(DocumentEntry.of("HEIGHT.FOO.2").withNumberType("5")); + givenDocumentEntry(DocumentEntry.of("HEIGHT.FOO.3").withNumberType("5")); + + executeGrouping(); + + // We should see groups being counted and aggregation for HEIGHT occurring. + resetDocument(); + givenDocumentEntry(DocumentEntry.of("GEN.FOO.1").withLcNoDiacritics("MALE")); + givenDocumentEntry(DocumentEntry.of("GEN.FOO.2").withLcNoDiacritics("MALE")); + givenDocumentEntry(DocumentEntry.of("AG.FOO.1").withNumberType("20")); + givenDocumentEntry(DocumentEntry.of("AG.FOO.2").withNumberType("15")); + // Should aggregate to FOO.1 grouping. + givenDocumentEntry(DocumentEntry.of("PEAK.FOO.1").withNumberType("5")); + // Should aggregate to FOO.2 grouping. + givenDocumentEntry(DocumentEntry.of("PEAK.FOO.2").withNumberType("5")); + + executeGrouping(); + + GroupsAssert groupsAssert = GroupsAssert.assertThat(groups); + groupsAssert.hasTotalGroups(5); + + // We should see each field mapped to the root model mapping name, e.g. GENDER -> GEN, AGE -> AG, HEIGHT -> PEAK, etc. + // @formatter:off + groupsAssert.assertGroup(textKey("GEN", "MALE")).hasCount(1) + .hasAggregatedSum("PEAK", new BigDecimal("5")); + + groupsAssert.assertGroup(textKey("GEN", "FEMALE")).hasCount(1) + .hasAggregatedSum("PEAK", new BigDecimal("5")); + + groupsAssert.assertGroup(textKey("GEN", "MALE"), numericKey("AG", "15")).hasCount(1) + .hasAggregatedSum("PEAK", new BigDecimal("5")); + + groupsAssert.assertGroup(textKey("GEN", "FEMALE"), numericKey("AG", "15")).hasCount(2) + .hasAggregatedSum("PEAK", new BigDecimal("5")); + + groupsAssert.assertGroup(textKey("GEN", "MALE"), numericKey("AG", "20")).hasCount(3) + .hasAggregatedSum("PEAK", new BigDecimal("10")); + // @formatter:on + } + + @Test + public void testAggregationAcrossDocumentsWithNoGroups() { + givenGroupFields("GEN", "AG"); + givenSumFields("PEAK"); + + givenRemappedFields(); + + // We should see an 'empty' group and aggregation for HEIGHT occurring. + givenDocumentEntry(DocumentEntry.of("HEIGHT.FOO.1").withNumberType("5")); + givenDocumentEntry(DocumentEntry.of("HEIGHT.FOO.2").withNumberType("5")); + givenDocumentEntry(DocumentEntry.of("HEIGHT.FOO.3").withNumberType("5")); + + executeGrouping(); + + // We should see an 'empty' group with no aggregation. + resetDocument(); + givenDocumentEntry(DocumentEntry.of("ADDRESS").withLcNoDiacritics("Los Angeles").withLcNoDiacritics("San Diego").withLcNoDiacritics("Baltimore")); + + executeGrouping(); + + // We should see an 'empty' group and aggregation for HEIGHT occurring. + resetDocument(); + givenDocumentEntry(DocumentEntry.of("ADDRESS").withLcNoDiacritics("Denver")); + givenDocumentEntry(DocumentEntry.of("HEIGHT.FOO.1").withNumberType("5")); + givenDocumentEntry(DocumentEntry.of("HEIGHT.FOO.2").withNumberType("5")); + givenDocumentEntry(DocumentEntry.of("HEIGHT.FOO.3").withNumberType("5")); + + executeGrouping(); + + // We should see an 'empty' group and aggregation for HEIGHT occurring. + resetDocument(); + givenDocumentEntry(DocumentEntry.of("ADDRESS").withLcNoDiacritics("Denver")); + givenDocumentEntry(DocumentEntry.of("PEAK.FOO.1").withNumberType("5")); + givenDocumentEntry(DocumentEntry.of("PEAK.FOO.2").withNumberType("5")); + + executeGrouping(); + + GroupsAssert groupsAssert = GroupsAssert.assertThat(groups); + groupsAssert.hasTotalGroups(1); + + // We should see each field mapped to the root model mapping name, e.g. GENDER -> GEN, AGE -> AG, HEIGHT -> PEAK, etc. + // @formatter:off + groupsAssert.assertGroup().hasCount(4).hasAggregatedSum("PEAK", new BigDecimal("40")); + // @formatter:on + } + + private void givenGroupFields(String... fields) { + groupFields.setGroupByFields(Sets.newHashSet(Arrays.asList(fields))); + } + + private void givenSumFields(String... fields) { + groupFields.setSumFields(Sets.newHashSet(Arrays.asList(fields))); + } + + private void givenCountFields(String... fields) { + groupFields.setCountFields(Sets.newHashSet(Arrays.asList(fields))); + } + + private void givenAverageFields(String... fields) { + groupFields.setAverageFields(Sets.newHashSet(Arrays.asList(fields))); + } + + private void givenMinFields(String... fields) { + groupFields.setMinFields(Sets.newHashSet(Arrays.asList(fields))); + } + + private void givenMaxFields(String... fields) { + groupFields.setMaxFields(Sets.newHashSet(Arrays.asList(fields))); + } + + private void givenRemappedFields() { + this.groupFields.remapFields(inverseReverseMap, reverseMap); + } + + private void resetDocument() { + this.document = new Document(); + } + + private void givenDocumentEntry(DocumentEntry builder) { + builder.addEntryTo(this.document); + } + + private void givenDocumentColumnVisibility(ColumnVisibility columnVisibility) { + this.document.setColumnVisibility(columnVisibility); + } + + private void executeGrouping() { + Map.Entry keyDocumentEntry = new AbstractMap.SimpleEntry<>(key, this.document); + DocumentGrouper.group(keyDocumentEntry, this.groupFields, this.groups); + } + + private GroupingAttribute numericKey(String key, String value) { + return createGroupingAttribute(key, new NumberType(value)); + } + + private GroupingAttribute textKey(String key, String value) { + return createGroupingAttribute(key, new LcNoDiacriticsType(value)); + } + + private GroupingAttribute createGroupingAttribute(String key, Type type) { + return new GroupingAttribute<>(type, new Key(key), true); + } + + private static class DocumentEntry { + private final String fieldName; + private final List> attributes = new ArrayList<>(); + + public static DocumentEntry of(String fieldName) { + return new DocumentEntry(fieldName); + } + + public DocumentEntry(String fieldName) { + this.fieldName = fieldName; + } + + public DocumentEntry withNumberType(String value) { + return withNumberType(value, COLVIS_ALL); + } + + public DocumentEntry withNumberType(String value, ColumnVisibility visibility) { + addTypedAttribute(new NumberType(value), visibility); + return this; + } + + public DocumentEntry withLcNoDiacritics(String value) { + return withLcNoDiacritics(value, COLVIS_ALL); + } + + public DocumentEntry withLcNoDiacritics(String value, ColumnVisibility visibility) { + addTypedAttribute(new LcNoDiacriticsType(value), visibility); + return this; + } + + private void addTypedAttribute(Type type, ColumnVisibility visibility) { + TypeAttribute attribute = new TypeAttribute<>(type, new Key("cf", "cq"), true); + attribute.setColumnVisibility(visibility); + this.attributes.add(attribute); + } + + public void addEntryTo(Document document) { + if (attributes.isEmpty()) { + throw new IllegalArgumentException("No attributes set for document entry"); + } else if (attributes.size() == 1) { + document.put(fieldName, this.attributes.get(0), true, false); + } else { + document.put(fieldName, new Attributes(this.attributes, true), true, false); + } + } + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/common/grouping/GroupFieldsTest.java b/warehouse/query-core/src/test/java/datawave/query/common/grouping/GroupFieldsTest.java new file mode 100644 index 0000000000..e6accddacc --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/common/grouping/GroupFieldsTest.java @@ -0,0 +1,247 @@ +package datawave.query.common.grouping; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.util.HashMap; +import java.util.Map; + +import org.junit.BeforeClass; +import org.junit.Test; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.HashMultimap; +import com.google.common.collect.Multimap; +import com.google.common.collect.Sets; + +public class GroupFieldsTest { + + private static final ObjectMapper objectMapper = new ObjectMapper(); + private static final Multimap inverseReverseModel = HashMultimap.create(); + private static final Map reverseModel = new HashMap<>(); + + @BeforeClass + public static void beforeClass() { + inverseReverseModel.put("GEN", "GENERE"); + inverseReverseModel.put("GEN", "GENDER"); + inverseReverseModel.put("AG", "AGE"); + inverseReverseModel.put("NOME", "NAME"); + inverseReverseModel.put("ADDR", "ADDRESS"); + + reverseModel.put("GENERE", "GEN"); + reverseModel.put("GENDER", "GEN"); + reverseModel.put("AGE", "AG"); + reverseModel.put("NAME", "NOME"); + reverseModel.put("ADDRESS", "ADDR"); + } + + @Test + public void testEmptyGroupFieldsToString() { + GroupFields groupFields = new GroupFields(); + assertThat(groupFields.toString()).isEmpty(); + } + + @Test + public void testGroupFieldsToString() { + GroupFields groupFields = new GroupFields(); + groupFields.setGroupByFields(Sets.newHashSet("A", "1")); + groupFields.setSumFields(Sets.newHashSet("B", "2")); + groupFields.setCountFields(Sets.newHashSet("C", "3")); + groupFields.setAverageFields(Sets.newHashSet("D", "4")); + groupFields.setMinFields(Sets.newHashSet("E", "5")); + groupFields.setMaxFields(Sets.newHashSet("F", "6")); + + assertThat(groupFields.toString()).isEqualTo("GROUP(A,1)|SUM(B,2)|COUNT(C,3)|AVERAGE(D,4)|MIN(E,5)|MAX(F,6)"); + } + + @Test + public void testRemappedGroupFieldsToString() { + GroupFields groupFields = new GroupFields(); + groupFields.setGroupByFields(Sets.newHashSet("AG", "GEN")); + groupFields.setSumFields(Sets.newHashSet("AG")); + groupFields.setCountFields(Sets.newHashSet("NOME")); + groupFields.setAverageFields(Sets.newHashSet("AG")); + groupFields.setMinFields(Sets.newHashSet("GEN")); + groupFields.setMaxFields(Sets.newHashSet("NOME")); + + groupFields.remapFields(inverseReverseModel, reverseModel); + + assertThat(groupFields.toString()).isEqualTo( + "GROUP(GEN,AG)|SUM(AG)|COUNT(NOME)|AVERAGE(AG)|MIN(GEN)|MAX(NOME)|REVERSE_MODEL_MAP(GENERE=GEN:GENDER=GEN:AGE=AG:NAME=NOME)"); + } + + @Test + public void testParsingFromNullString() { + assertThat(GroupFields.from(null)).isNull(); + } + + @Test + public void testParsingFromEmptyString() { + assertThat(GroupFields.from("")).isEqualTo(new GroupFields()); + } + + @Test + public void testParsingFromWhitespace() { + assertThat(GroupFields.from(" ")).isEqualTo(new GroupFields()); + } + + @Test + public void testParsingGroupFieldsWithGroupByFieldsOnly() { + GroupFields expected = new GroupFields(); + expected.setGroupByFields(Sets.newHashSet("AGE", "GENDER")); + + GroupFields actual = GroupFields.from("GROUP(AGE,GENDER)"); + + assertThat(actual).isEqualTo(expected); + } + + @Test + public void testParsingGroupFieldsWithSomeAggregationFields() { + GroupFields expected = new GroupFields(); + expected.setGroupByFields(Sets.newHashSet("AGE", "GENDER")); + expected.setSumFields(Sets.newHashSet("AGE")); + expected.setMaxFields(Sets.newHashSet("NAME")); + + GroupFields actual = GroupFields.from("GROUP(AGE,GENDER)|SUM(AGE)|MAX(NAME)"); + + assertThat(actual).isEqualTo(expected); + } + + @Test + public void testParsingGroupFieldsWithAllAggregationFields() { + GroupFields expected = new GroupFields(); + expected.setGroupByFields(Sets.newHashSet("AGE", "GENDER")); + expected.setSumFields(Sets.newHashSet("BAT")); + expected.setCountFields(Sets.newHashSet("FOO")); + expected.setAverageFields(Sets.newHashSet("BAR")); + expected.setMinFields(Sets.newHashSet("HAT")); + expected.setMaxFields(Sets.newHashSet("BAH")); + + GroupFields actual = GroupFields.from("GROUP(AGE,GENDER)|SUM(BAT)|COUNT(FOO)|AVERAGE(BAR)|MIN(HAT)|MAX(BAH)"); + + assertThat(actual).isEqualTo(expected); + } + + @Test + public void testParsingRemappedGroupFields() { + GroupFields expected = new GroupFields(); + expected.setGroupByFields(Sets.newHashSet("AG")); + expected.setSumFields(Sets.newHashSet("AG")); + expected.setCountFields(Sets.newHashSet("NOME")); + expected.setAverageFields(Sets.newHashSet("BAR")); + expected.setMinFields(Sets.newHashSet("BAT")); + expected.setMaxFields(Sets.newHashSet("FOO")); + expected.remapFields(inverseReverseModel, reverseModel); + + GroupFields actual = GroupFields.from("GROUP(AG)|SUM(AG)|COUNT(NOME)|AVERAGE(BAR)|MIN(BAT)|MAX(FOO)|REVERSE_MODEL_MAP(AGE=AG:NAME=NOME)"); + + assertThat(actual).isEqualTo(expected); + } + + @Test + public void testParsingLegacyFormat() { + GroupFields expected = new GroupFields(); + expected.setGroupByFields(Sets.newHashSet("AGE", "GENDER", "NAME")); + + GroupFields actual = GroupFields.from("AGE,GENDER,NAME"); + + assertThat(actual).isEqualTo(expected); + } + + @Test + public void testDeconstructIdentifiers() { + GroupFields groupFields = new GroupFields(); + groupFields.setGroupByFields(Sets.newHashSet("$AGE", "$GENDER")); + groupFields.setSumFields(Sets.newHashSet("$AGE", "$GENDER")); + groupFields.setCountFields(Sets.newHashSet("$AGE", "$GENDER")); + groupFields.setAverageFields(Sets.newHashSet("$AGE", "$GENDER")); + groupFields.setMinFields(Sets.newHashSet("$AGE", "$GENDER")); + groupFields.setMaxFields(Sets.newHashSet("$AGE", "$GENDER")); + + groupFields.deconstructIdentifiers(); + + assertThat(groupFields.getGroupByFields()).containsExactlyInAnyOrder("AGE", "GENDER"); + assertThat(groupFields.getSumFields()).containsExactlyInAnyOrder("AGE", "GENDER"); + assertThat(groupFields.getCountFields()).containsExactlyInAnyOrder("AGE", "GENDER"); + assertThat(groupFields.getMinFields()).containsExactlyInAnyOrder("AGE", "GENDER"); + assertThat(groupFields.getMaxFields()).containsExactlyInAnyOrder("AGE", "GENDER"); + assertThat(groupFields.getAverageFields()).containsExactlyInAnyOrder("AGE", "GENDER"); + } + + @Test + public void testRemapFields() { + GroupFields groupFields = new GroupFields(); + groupFields.setGroupByFields(Sets.newHashSet("AG", "GEN")); + groupFields.setSumFields(Sets.newHashSet("AG")); + groupFields.setCountFields(Sets.newHashSet("NOME")); + groupFields.setAverageFields(Sets.newHashSet("AG")); + groupFields.setMinFields(Sets.newHashSet("GEN")); + groupFields.setMaxFields(Sets.newHashSet("NOME")); + + groupFields.remapFields(inverseReverseModel, reverseModel); + + assertThat(groupFields.getGroupByFields()).containsExactlyInAnyOrder("GEN", "AG"); + assertThat(groupFields.getSumFields()).containsExactlyInAnyOrder("AG"); + assertThat(groupFields.getCountFields()).containsExactlyInAnyOrder("NOME"); + assertThat(groupFields.getAverageFields()).containsExactlyInAnyOrder("AG"); + assertThat(groupFields.getMinFields()).containsExactlyInAnyOrder("GEN"); + assertThat(groupFields.getMaxFields()).containsExactlyInAnyOrder("NOME"); + assertThat(groupFields.getReverseModelMap()).containsEntry("AGE", "AG").containsEntry("GENDER", "GEN").containsEntry("GENERE", "GEN") + .containsEntry("NAME", "NOME").hasSize(4); + } + + @Test + public void testSerialization() throws JsonProcessingException { + GroupFields groupFields = new GroupFields(); + groupFields.setGroupByFields(Sets.newHashSet("AG", "GEN")); + groupFields.setSumFields(Sets.newHashSet("AG")); + groupFields.setCountFields(Sets.newHashSet("NOME")); + groupFields.setAverageFields(Sets.newHashSet("AG")); + groupFields.setMinFields(Sets.newHashSet("GEN")); + groupFields.setMaxFields(Sets.newHashSet("NOME")); + + groupFields.remapFields(inverseReverseModel, reverseModel); + + String json = objectMapper.writeValueAsString(groupFields); + assertThat(json).isEqualTo( + "\"GROUP(GEN,AG)|SUM(AG)|COUNT(NOME)|AVERAGE(AG)|MIN(GEN)|MAX(NOME)|REVERSE_MODEL_MAP(GENERE=GEN:GENDER=GEN:AGE=AG:NAME=NOME)\""); + } + + @Test + public void testDeserialization() throws JsonProcessingException { + GroupFields expected = new GroupFields(); + expected.setGroupByFields(Sets.newHashSet("AG", "GEN")); + expected.setSumFields(Sets.newHashSet("AG")); + expected.setCountFields(Sets.newHashSet("NOME")); + expected.setAverageFields(Sets.newHashSet("AG")); + expected.setMinFields(Sets.newHashSet("GEN")); + expected.setMaxFields(Sets.newHashSet("NOME")); + expected.remapFields(inverseReverseModel, reverseModel); + + String json = "\"GROUP(GEN,AG)|SUM(AG)|COUNT(NOME)|AVERAGE(AG)|MIN(GEN)|MAX(NOME)|REVERSE_MODEL_MAP(GENERE=GEN:GENDER=GEN:AGE=AG:NAME=NOME)\""; + GroupFields actual = objectMapper.readValue(json, GroupFields.class); + + assertThat(actual).isEqualTo(expected); + } + + @Test + public void testGetFieldAggregatorFactory() { + GroupFields groupFields = new GroupFields(); + groupFields.setGroupByFields(Sets.newHashSet("AGE", "GENDER")); + groupFields.setSumFields(Sets.newHashSet("AGE")); + groupFields.setCountFields(Sets.newHashSet("NAME")); + groupFields.setAverageFields(Sets.newHashSet("HEIGHT")); + groupFields.setMinFields(Sets.newHashSet("SALARY")); + groupFields.setMaxFields(Sets.newHashSet("RANK")); + + // @formatter:off + FieldAggregator.Factory expected = new FieldAggregator.Factory().withSumFields("AGE") + .withCountFields("NAME") + .withAverageFields("HEIGHT") + .withMinFields("SALARY") + .withMaxFields("RANK"); + // @formatter:on + + assertThat(groupFields.getFieldAggregatorFactory()).isEqualTo(expected); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/common/grouping/MaxAggregatorTest.java b/warehouse/query-core/src/test/java/datawave/query/common/grouping/MaxAggregatorTest.java new file mode 100644 index 0000000000..5c65b0fbee --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/common/grouping/MaxAggregatorTest.java @@ -0,0 +1,123 @@ +package datawave.query.common.grouping; + +import static org.junit.Assert.assertEquals; + +import org.apache.accumulo.core.data.Key; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import datawave.query.attributes.Attribute; +import datawave.query.attributes.Content; +import datawave.query.attributes.DateContent; +import datawave.query.attributes.DiacriticContent; +import datawave.query.attributes.Numeric; + +/** + * Tests for {@link MaxAggregator}. + */ +public class MaxAggregatorTest { + + private MaxAggregator aggregator; + + @Before + public void setUp() throws Exception { + aggregator = new MaxAggregator("FIELD"); + } + + /** + * Verify that the initial max is null. + */ + @Test + public void testInitialMax() { + assertMax(null); + } + + /** + * Verify that if given a value that is of a different type than the current max, that an exception is thrown. + */ + @Test + public void testConflictingTypes() { + Content first = createContent("aaa"); + aggregator.aggregate(first); + assertMax(first); + + DiacriticContent diacriticContent = new DiacriticContent("different content type", new Key(), true); + IllegalArgumentException exception = Assert.assertThrows(IllegalArgumentException.class, () -> aggregator.aggregate(diacriticContent)); + assertEquals("Failed to compare current max 'aaa' to new value 'different content type'", exception.getMessage()); + } + + /** + * Verify that finding the max of string values works. + */ + @Test + public void testStringAggregation() { + Content content = createContent("a"); + aggregator.aggregate(content); + assertMax(content); + + // Verify the max updated. + content = createContent("d"); + aggregator.aggregate(content); + assertMax(content); + + // Verify the max did not change. + aggregator.aggregate(createContent("b")); + assertMax(content); + } + + /** + * Verify that finding the max of number values works. + */ + @Test + public void testNumericAggregation() { + Numeric numeric = createNumeric("1.5"); + aggregator.aggregate(numeric); + assertMax(numeric); + + // Verify the max updated. + numeric = createNumeric("10"); + aggregator.aggregate(numeric); + assertMax(numeric); + + // Verify the max did not change. + aggregator.aggregate(createNumeric("6")); + assertMax(numeric); + } + + /** + * Verify that finding the max of date values work. + */ + @Test + public void testDateAggregation() { + DateContent dateContent = createDateContent("20221201120000"); + aggregator.aggregate(dateContent); + assertMax(dateContent); + + // Verify the max updated. + dateContent = createDateContent("20251201120000"); + aggregator.aggregate(dateContent); + assertMax(dateContent); + + // Verify the max did not change. + aggregator.aggregate(createDateContent("20231201120000")); + assertMax(dateContent); + } + + private Content createContent(String content) { + return new Content(content, new Key(), true); + } + + private Numeric createNumeric(String number) { + return new Numeric(number, new Key(), true); + } + + private DateContent createDateContent(String date) { + return new DateContent(date, new Key(), true); + } + + private void assertMax(Attribute expected) { + + assertEquals(expected, aggregator.getAggregation()); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/common/grouping/MinAggregatorTest.java b/warehouse/query-core/src/test/java/datawave/query/common/grouping/MinAggregatorTest.java new file mode 100644 index 0000000000..c573d00b51 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/common/grouping/MinAggregatorTest.java @@ -0,0 +1,119 @@ +package datawave.query.common.grouping; + +import static org.junit.Assert.assertEquals; + +import org.apache.accumulo.core.data.Key; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import datawave.query.attributes.Attribute; +import datawave.query.attributes.Content; +import datawave.query.attributes.DateContent; +import datawave.query.attributes.DiacriticContent; +import datawave.query.attributes.Numeric; + +public class MinAggregatorTest { + + private MinAggregator aggregator; + + @Before + public void setUp() throws Exception { + aggregator = new MinAggregator("FIELD"); + } + + /** + * Verify that the initial min is null. + */ + @Test + public void testInitialMin() { + assertMin(null); + } + + /** + * Verify that if given a value that is of a different type than the current min, that an exception is thrown. + */ + @Test + public void testConflictingTypes() { + Content first = createContent("aaa"); + aggregator.aggregate(first); + assertMin(first); + + DiacriticContent diacriticContent = new DiacriticContent("different content type", new Key(), true); + IllegalArgumentException exception = Assert.assertThrows(IllegalArgumentException.class, () -> aggregator.aggregate(diacriticContent)); + assertEquals("Failed to compare current min 'aaa' to new value 'different content type'", exception.getMessage()); + } + + /** + * Verify that finding the min of string values works. + */ + @Test + public void testStringAggregation() { + Content content = createContent("d"); + aggregator.aggregate(content); + assertMin(content); + + // Verify the min updated. + content = createContent("a"); + aggregator.aggregate(content); + assertMin(content); + + // Verify the min did not change. + aggregator.aggregate(createContent("b")); + assertMin(content); + } + + /** + * Verify that finding the min of number values works. + */ + @Test + public void testNumericAggregation() { + Numeric numeric = createNumeric("10"); + aggregator.aggregate(numeric); + assertMin(numeric); + + // Verify the max updated. + numeric = createNumeric("1.5"); + aggregator.aggregate(numeric); + assertMin(numeric); + + // Verify the max did not change. + aggregator.aggregate(createNumeric("6")); + assertMin(numeric); + } + + /** + * Verify that finding the min of date values work. + */ + @Test + public void testDateAggregation() { + DateContent dateContent = createDateContent("20251201120000"); + aggregator.aggregate(dateContent); + assertMin(dateContent); + + // Verify the max updated. + dateContent = createDateContent("20221201120000"); + aggregator.aggregate(dateContent); + assertMin(dateContent); + + // Verify the max did not change. + aggregator.aggregate(createDateContent("20231201120000")); + assertMin(dateContent); + } + + private Content createContent(String content) { + return new Content(content, new Key(), true); + } + + private Numeric createNumeric(String number) { + return new Numeric(number, new Key(), true); + } + + private DateContent createDateContent(String date) { + return new DateContent(date, new Key(), true); + } + + private void assertMin(Attribute expected) { + assertEquals(expected, aggregator.getAggregation()); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/common/grouping/SumAggregatorTest.java b/warehouse/query-core/src/test/java/datawave/query/common/grouping/SumAggregatorTest.java new file mode 100644 index 0000000000..b99d1d1d1a --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/common/grouping/SumAggregatorTest.java @@ -0,0 +1,74 @@ +package datawave.query.common.grouping; + +import static org.junit.Assert.assertEquals; + +import java.math.BigDecimal; + +import org.apache.accumulo.core.data.Key; +import org.apache.hadoop.thirdparty.org.checkerframework.checker.units.qual.K; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import datawave.data.type.NumberType; +import datawave.data.type.Type; +import datawave.query.attributes.Content; +import datawave.query.attributes.Numeric; +import datawave.query.attributes.TypeAttribute; + +/** + * Tests for {@link SumAggregator}. + */ +public class SumAggregatorTest { + + private SumAggregator aggregator; + + @Before + public void setUp() throws Exception { + aggregator = new SumAggregator("FIELD"); + } + + /** + * Verify the initial sum is 0. + */ + @Test + public void testInitialSum() { + assertSum(null); + } + + /** + * Verify that if given a non-numeric value, that an exception is thrown. + */ + @Test + public void testNonNumericValue() { + Content content = new Content("i am content", new Key(), true); + + IllegalArgumentException exception = Assert.assertThrows(IllegalArgumentException.class, () -> aggregator.aggregate(content)); + assertEquals("Unable to calculate a sum with non-numerical value 'i am content'", exception.getMessage()); + } + + /** + * Verify that given additional numeric values, that the sum is correctly calculated. + */ + @Test + public void testAggregation() { + aggregator.aggregate(createNumeric("4")); + assertSum(new BigDecimal("4")); + + aggregator.aggregate(createNumeric("1")); + aggregator.aggregate(createNumeric("1")); + assertSum(new BigDecimal("6")); + + aggregator.aggregate(createNumeric("4.5")); + assertSum(new BigDecimal("10.5")); + } + + private TypeAttribute createNumeric(String number) { + Type type = new NumberType(number); + return new TypeAttribute<>(type, new Key(), true); + } + + private void assertSum(BigDecimal sum) { + assertEquals(sum, aggregator.getAggregation()); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/config/ShardQueryConfigurationTest.java b/warehouse/query-core/src/test/java/datawave/query/config/ShardQueryConfigurationTest.java index 84fbd34378..7711eabfa7 100644 --- a/warehouse/query-core/src/test/java/datawave/query/config/ShardQueryConfigurationTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/config/ShardQueryConfigurationTest.java @@ -35,6 +35,7 @@ import datawave.query.DocumentSerialization; import datawave.query.attributes.ExcerptFields; import datawave.query.attributes.UniqueFields; +import datawave.query.common.grouping.GroupFields; import datawave.query.iterator.ivarator.IvaratorCacheDirConfig; import datawave.query.iterator.logic.TermFrequencyExcerptIterator; import datawave.query.iterator.logic.TermFrequencyIndexIterator; @@ -523,11 +524,8 @@ public void setUp() throws Exception { updatedValues.put("groupFieldsBatchSizeAsString", "5"); alreadySet.add("groupFieldsBatchSizeAsString"); - defaultValues.put("groupFields", Sets.newHashSet()); - updatedValues.put("groupFields", Sets.newHashSet("FIELD_G", "FIELD_H")); - defaultValues.put("groupFieldsAsString", ""); - updatedValues.put("groupFieldsAsString", "FIELD_G,FIELD_H"); - alreadySet.add("groupFieldsAsString"); + defaultValues.put("groupFields", new GroupFields()); + updatedValues.put("groupFields", GroupFields.from("GROUP(FIELD_G,FIELD_H)")); } private Query createQuery(String query) { diff --git a/warehouse/query-core/src/test/java/datawave/query/transformer/GroupingTest.java b/warehouse/query-core/src/test/java/datawave/query/transformer/GroupingTest.java index b079daedb3..53d74a35ea 100644 --- a/warehouse/query-core/src/test/java/datawave/query/transformer/GroupingTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/transformer/GroupingTest.java @@ -1,72 +1,70 @@ package datawave.query.transformer; -import static datawave.query.RebuildingScannerTestHelper.TEARDOWN.ALWAYS; -import static datawave.query.RebuildingScannerTestHelper.TEARDOWN.ALWAYS_SANS_CONSISTENCY; -import static datawave.query.RebuildingScannerTestHelper.TEARDOWN.EVERY_OTHER; -import static datawave.query.RebuildingScannerTestHelper.TEARDOWN.EVERY_OTHER_SANS_CONSISTENCY; -import static datawave.query.RebuildingScannerTestHelper.TEARDOWN.NEVER; -import static datawave.query.RebuildingScannerTestHelper.TEARDOWN.RANDOM; -import static datawave.query.RebuildingScannerTestHelper.TEARDOWN.RANDOM_SANS_CONSISTENCY; +import static org.assertj.core.api.Assertions.assertThat; import java.text.DateFormat; +import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; import java.util.Collections; import java.util.Date; +import java.util.EnumSet; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; +import java.util.SortedMap; +import java.util.SortedSet; import java.util.TimeZone; +import java.util.TreeMap; +import java.util.TreeSet; import java.util.UUID; +import java.util.function.BiConsumer; +import java.util.stream.Collectors; -import javax.enterprise.inject.Produces; import javax.inject.Inject; import org.apache.accumulo.core.client.AccumuloClient; -import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.security.Authorizations; -import org.apache.accumulo.core.security.ColumnVisibility; -import org.apache.commons.collections4.iterators.TransformIterator; +import org.apache.commons.lang.builder.ToStringBuilder; import org.apache.log4j.Logger; -import org.easymock.EasyMock; +import org.assertj.core.api.Assertions; +import org.assertj.core.util.Sets; import org.jboss.arquillian.container.test.api.Deployment; import org.jboss.arquillian.junit.Arquillian; import org.jboss.shrinkwrap.api.ShrinkWrap; import org.jboss.shrinkwrap.api.asset.StringAsset; import org.jboss.shrinkwrap.api.spec.JavaArchive; +import org.junit.After; import org.junit.AfterClass; -import org.junit.Assert; import org.junit.Before; -import org.junit.Rule; +import org.junit.BeforeClass; import org.junit.Test; -import org.junit.rules.TemporaryFolder; import org.junit.runner.RunWith; +import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.MapperFeature; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.ImmutableMap; +import com.fasterxml.jackson.databind.ObjectWriter; +import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; -import com.google.common.collect.Sets; import datawave.configuration.spring.SpringBean; -import datawave.data.type.LcType; -import datawave.data.type.NumberType; import datawave.helpers.PrintUtility; import datawave.ingest.data.TypeRegistry; -import datawave.marking.MarkingFunctions; +import datawave.query.QueryParameters; import datawave.query.QueryTestTableHelper; import datawave.query.RebuildingScannerTestHelper; -import datawave.query.attributes.Attribute; -import datawave.query.common.grouping.GroupingUtil.GroupCountingHashMap; -import datawave.query.common.grouping.GroupingUtil.GroupingTypeAttribute; +import datawave.query.common.grouping.AggregateOperation; +import datawave.query.common.grouping.DocumentGrouper; import datawave.query.function.deserializer.KryoDocumentDeserializer; -import datawave.query.language.parser.jexl.JexlControlledQueryParser; +import datawave.query.iterator.QueryOptions; import datawave.query.language.parser.jexl.LuceneToJexlQueryParser; import datawave.query.tables.ShardQueryLogic; +import datawave.query.tables.edge.DefaultEdgeEventQueryLogic; import datawave.query.util.VisibilityWiseGuysIngest; +import datawave.query.util.VisibilityWiseGuysIngestWithModel; import datawave.util.TableName; import datawave.webservice.edgedictionary.RemoteEdgeDictionary; import datawave.webservice.query.QueryImpl; @@ -74,50 +72,19 @@ import datawave.webservice.query.iterator.DatawaveTransformIterator; import datawave.webservice.query.result.event.EventBase; import datawave.webservice.query.result.event.FieldBase; -import datawave.webservice.result.BaseQueryResponse; import datawave.webservice.result.DefaultEventQueryResponse; -import datawave.webservice.result.EventQueryResponseBase; /** - * Applies grouping to queries - * + * Applies grouping to queries. */ public abstract class GroupingTest { - private static final Logger log = Logger.getLogger(GroupingTest.class); - - private static final String COLVIS_MARKING = "columnVisibility"; - private static final String EXPECTED_COLVIS = "ALL&E&I"; - - private static Authorizations auths = new Authorizations("ALL", "E", "I"); - - // @formatter:off - private static final List TEARDOWNS = Lists.newArrayList( - NEVER, - ALWAYS, - ALWAYS_SANS_CONSISTENCY, - RANDOM, - RANDOM_SANS_CONSISTENCY, - EVERY_OTHER, - EVERY_OTHER_SANS_CONSISTENCY - ); - private static final List INTERRUPTS = Arrays.asList(RebuildingScannerTestHelper.INTERRUPT.values()); - // @formatter:on - @RunWith(Arquillian.class) public static class ShardRange extends GroupingTest { @Override - protected BaseQueryResponse runTestQueryWithGrouping(Map expected, String querystr, Date startDate, Date endDate, - Map extraParms, RebuildingScannerTestHelper.TEARDOWN teardown, RebuildingScannerTestHelper.INTERRUPT interrupt) - throws Exception { - QueryTestTableHelper qtth = new QueryTestTableHelper(ShardRange.class.getName(), log, teardown, interrupt); - AccumuloClient client = qtth.client; - VisibilityWiseGuysIngest.writeItAll(client, VisibilityWiseGuysIngest.WhatKindaRange.SHARD); - PrintUtility.printTable(client, auths, TableName.SHARD); - PrintUtility.printTable(client, auths, TableName.SHARD_INDEX); - PrintUtility.printTable(client, auths, QueryTestTableHelper.MODEL_TABLE_NAME); - return super.runTestQueryWithGrouping(expected, querystr, startDate, endDate, extraParms, client); + protected String getRange() { + return "SHARD"; } } @@ -125,535 +92,916 @@ protected BaseQueryResponse runTestQueryWithGrouping(Map expecte public static class DocumentRange extends GroupingTest { @Override - protected BaseQueryResponse runTestQueryWithGrouping(Map expected, String querystr, Date startDate, Date endDate, - Map extraParms, RebuildingScannerTestHelper.TEARDOWN teardown, RebuildingScannerTestHelper.INTERRUPT interrupt) - throws Exception { - QueryTestTableHelper qtth = new QueryTestTableHelper(DocumentRange.class.toString(), log, teardown, interrupt); - AccumuloClient client = qtth.client; - VisibilityWiseGuysIngest.writeItAll(client, VisibilityWiseGuysIngest.WhatKindaRange.DOCUMENT); - PrintUtility.printTable(client, auths, TableName.SHARD); - PrintUtility.printTable(client, auths, TableName.SHARD_INDEX); - PrintUtility.printTable(client, auths, QueryTestTableHelper.MODEL_TABLE_NAME); - return super.runTestQueryWithGrouping(expected, querystr, startDate, endDate, extraParms, client); + protected String getRange() { + return "DOCUMENT"; + } + } + + private static class QueryResult { + private static final ObjectWriter writer = new ObjectMapper().enable(MapperFeature.USE_WRAPPER_NAME_AS_PROPERTY_NAME).writerWithDefaultPrettyPrinter(); + + private final RebuildingScannerTestHelper.TEARDOWN teardown; + private final RebuildingScannerTestHelper.INTERRUPT interrupt; + private final DefaultEventQueryResponse response; + private final String json; + + private QueryResult(RebuildingScannerTestHelper.TEARDOWN teardown, RebuildingScannerTestHelper.INTERRUPT interrupt, DefaultEventQueryResponse response) + throws JsonProcessingException { + this.teardown = teardown; + this.interrupt = interrupt; + this.response = response; + this.json = writer.writeValueAsString(response); + } + } + + private static class Group { + private final SortedSet groupValues; + private final SortedMap> aggregateValues = new TreeMap<>(); + private int count; + + public static Group of(String... values) { + return new Group(values); + } + + public Group() { + this.groupValues = new TreeSet<>(); + } + + public Group(String... values) { + this.groupValues = Sets.newTreeSet(values); + } + + public void addGroupValue(String value) { + this.groupValues.add(value); + } + + public Group withCount(int count) { + this.count = count; + return this; + } + + public Group withAggregate(Aggregate field) { + this.aggregateValues.put(field.field, field.values); + return this; + } + + public Group withFieldSum(String field, String sum) { + putAggregate(field, AggregateOperation.SUM, sum); + return this; + } + + public Group withFieldMax(String field, String max) { + putAggregate(field, AggregateOperation.MAX, max); + return this; + } + + public Group withFieldMin(String field, String min) { + putAggregate(field, AggregateOperation.MIN, min); + return this; + } + + public Group withFieldCount(String field, String count) { + putAggregate(field, AggregateOperation.COUNT, count); + return this; + } + + public Group withFieldAverage(String field, String average) { + putAggregate(field, AggregateOperation.AVERAGE, average); + return this; + } + + private void putAggregate(String field, AggregateOperation operation, String value) { + Map map = aggregateValues.computeIfAbsent(field, k -> new TreeMap<>()); + map.put(operation, value); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + Group group = (Group) o; + return count == group.count && Objects.equals(groupValues, group.groupValues) && Objects.equals(aggregateValues, group.aggregateValues); + } + + @Override + public int hashCode() { + return Objects.hash(groupValues, count, aggregateValues); + } + + @Override + public String toString() { + return new ToStringBuilder(this).append("groupValues", groupValues).append("aggregateValues", aggregateValues).append("count", count).toString(); } } - @Rule - public TemporaryFolder temporaryFolder = new TemporaryFolder(); + private static class Aggregate { + private final String field; + private final SortedMap values = new TreeMap<>(); + + public static Aggregate of(String field) { + return new Aggregate(field); + } + + public Aggregate(String field) { + this.field = field; + } + + public Aggregate withSum(String sum) { + values.put(AggregateOperation.SUM, sum); + return this; + } + + public Aggregate withAverage(String average) { + values.put(AggregateOperation.AVERAGE, average); + return this; + } + + public Aggregate withCount(String count) { + values.put(AggregateOperation.COUNT, count); + return this; + } - protected Set authSet = Collections.singleton(auths); + public Aggregate withMin(String min) { + values.put(AggregateOperation.MIN, min); + return this; + } + + public Aggregate withMax(String max) { + values.put(AggregateOperation.MAX, max); + return this; + } + } + + private static final String COUNT_FIELD = "COUNT"; + private static final Set FIELDS_OF_INTEREST = ImmutableSet.of("GENDER", "GEN", "BIRTHDAY", "AGE", "AG", "RECORD"); + private static final Logger log = Logger.getLogger(GroupingTest.class); + private static final String COLVIS_MARKING = "columnVisibility"; + private static final String REDUCED_COLVIS = "ALL&E&I"; + private static final Authorizations auths = new Authorizations("ALL", "E", "I"); + private static final EnumSet TEARDOWNS = EnumSet.allOf(RebuildingScannerTestHelper.TEARDOWN.class); + private static final EnumSet INTERRUPTS = EnumSet.allOf(RebuildingScannerTestHelper.INTERRUPT.class); + private static final Set authSet = Collections.singleton(auths); @Inject @SpringBean(name = "EventQuery") protected ShardQueryLogic logic; - protected KryoDocumentDeserializer deserializer; private final DateFormat format = new SimpleDateFormat("yyyyMMdd"); + private final Map queryParameters = new HashMap<>(); + private final Map,Group> expectedGroups = new HashMap<>(); + private final List queryResults = new ArrayList<>(); + + private String query; + private Date startDate; + private Date endDate; + private BiConsumer dataWriter; @Deployment public static JavaArchive createDeployment() { - return ShrinkWrap.create(JavaArchive.class) .addPackages(true, "org.apache.deltaspike", "io.astefanutti.metrics.cdi", "datawave.query", "org.jboss.logging", "datawave.webservice.query.result.event") + .deleteClass(DefaultEdgeEventQueryLogic.class).deleteClass(RemoteEdgeDictionary.class) .deleteClass(datawave.query.metrics.QueryMetricQueryLogic.class).deleteClass(datawave.query.metrics.ShardTableQueryMetricHandler.class) .addAsManifestResource(new StringAsset( "" + "datawave.query.tables.edge.MockAlternative" + ""), "beans.xml"); } + @BeforeClass + public static void beforeClass() { + TimeZone.setDefault(TimeZone.getTimeZone("GMT")); + } + + @Before + public void setup() throws ParseException { + this.logic.setFullTableScanEnabled(true); + this.logic.setMaxEvaluationPipelines(1); + this.logic.setQueryExecutionForPageTimeout(300000000000000L); + this.deserializer = new KryoDocumentDeserializer(); + this.startDate = format.parse("20091231"); + this.endDate = format.parse("20150101"); + } + + @After + public void tearDown() { + this.queryParameters.clear(); + this.query = null; + this.startDate = null; + this.endDate = null; + this.expectedGroups.clear(); + this.queryResults.clear(); + this.dataWriter = null; + } + @AfterClass public static void teardown() { TypeRegistry.reset(); } - @Before - public void setup() { - TimeZone.setDefault(TimeZone.getTimeZone("GMT")); + protected abstract String getRange(); + + private void givenQuery(String query) { + this.query = query; + } + + private void givenQueryParameter(String key, String value) { + this.queryParameters.put(key, value); + } + + private void expectGroup(Group group) { + expectedGroups.put(group.groupValues, group); + } + + private void givenLuceneParserForLogic() { + logic.setParser(new LuceneToJexlQueryParser()); + } - logic.setFullTableScanEnabled(true); - logic.setMaxEvaluationPipelines(1); - logic.setQueryExecutionForPageTimeout(300000000000000L); - deserializer = new KryoDocumentDeserializer(); + private void givenNonModelData() { + dataWriter = (client, range) -> { + try { + VisibilityWiseGuysIngest.writeItAll(client, range); + } catch (Exception e) { + throw new RuntimeException(e); + } + }; } - protected abstract BaseQueryResponse runTestQueryWithGrouping(Map expected, String querystr, Date startDate, Date endDate, - Map extraParms, RebuildingScannerTestHelper.TEARDOWN teardown, RebuildingScannerTestHelper.INTERRUPT interrupt) - throws Exception; + private void givenModelData() { + dataWriter = (client, range) -> { + try { + VisibilityWiseGuysIngestWithModel.writeItAll(client, range); + } catch (Exception e) { + throw new RuntimeException(e); + } + }; + } - protected BaseQueryResponse runTestQueryWithGrouping(Map expected, String querystr, Date startDate, Date endDate, - Map extraParms, AccumuloClient client) throws Exception { - log.debug("runTestQueryWithGrouping"); + private void assertGroups() { + for (QueryResult result : queryResults) { + Map,Group> actualGroups = new HashMap<>(); + // noinspection rawtypes + for (EventBase event : result.response.getEvents()) { + Group group = new Group(); + for (Object field : event.getFields()) { + FieldBase fieldBase = (FieldBase) field; + String fieldName = fieldBase.getName(); + if (fieldName.equals(COUNT_FIELD)) { + group.withCount(Integer.parseInt(fieldBase.getValueString())); + } else if (FIELDS_OF_INTEREST.contains(fieldName)) { + group.addGroupValue(fieldBase.getValueString()); + } else if (fieldName.endsWith(DocumentGrouper.FIELD_SUM_SUFFIX)) { + fieldName = removeSuffix(fieldName, DocumentGrouper.FIELD_SUM_SUFFIX); + group.withFieldSum(fieldName, fieldBase.getValueString()); + } else if (fieldName.endsWith(DocumentGrouper.FIELD_MAX_SUFFIX)) { + fieldName = removeSuffix(fieldName, DocumentGrouper.FIELD_MAX_SUFFIX); + group.withFieldMax(fieldName, fieldBase.getValueString()); + } else if (fieldName.endsWith(DocumentGrouper.FIELD_MIN_SUFFIX)) { + fieldName = removeSuffix(fieldName, DocumentGrouper.FIELD_MIN_SUFFIX); + group.withFieldMin(fieldName, fieldBase.getValueString()); + } else if (fieldName.endsWith(DocumentGrouper.FIELD_COUNT_SUFFIX)) { + fieldName = removeSuffix(fieldName, DocumentGrouper.FIELD_COUNT_SUFFIX); + group.withFieldCount(fieldName, fieldBase.getValueString()); + } else if (fieldName.endsWith(DocumentGrouper.FIELD_AVERAGE_SUFFIX)) { + fieldName = removeSuffix(fieldName, DocumentGrouper.FIELD_AVERAGE_SUFFIX); + group.withFieldAverage(fieldName, fieldBase.getValueString()); + } + } + actualGroups.put(group.groupValues, group); + } + assertThat(actualGroups).describedAs("Assert group for teardown: %s, interrupt: %s", result.teardown, result.interrupt) + .containsExactlyInAnyOrderEntriesOf(expectedGroups); + } + } + private String removeSuffix(String str, String suffix) { + int suffixLength = suffix.length(); + return str.substring(0, str.length() - suffixLength); + } + + private void assertResponseEventsAreIdenticalForAllTestResults() { + RebuildingScannerTestHelper.TEARDOWN prevTeardown = null; + RebuildingScannerTestHelper.INTERRUPT prevInterrupt = null; + String prevEvents = null; + + for (QueryResult result : queryResults) { + DefaultEventQueryResponse response = result.response; + String events = getEventFieldNamesAndValues(response); + if (prevEvents != null) { + assertThat(events) + .describedAs("Assert events are identical between result from (teardown: %s, interrupt: %s) and (teardown: %s, interrupt: %s)", + result.teardown, result.interrupt, prevTeardown, prevInterrupt) + .isEqualTo(prevEvents); + } + prevEvents = events; + prevTeardown = result.teardown; + prevInterrupt = result.interrupt; + } + } + + private String getEventFieldNamesAndValues(DefaultEventQueryResponse response) { + // @formatter:off + //noinspection unchecked + return response.getEvents().stream().map((event) -> ((List>)event.getFields())) + .flatMap(List::stream) + .map((field) -> field.getName() + ":" + field.getTypedValue().getValue()) + .collect(Collectors.joining(",")); + // @formatter:on + } + + private void collectQueryResults() throws Exception { + for (RebuildingScannerTestHelper.TEARDOWN teardown : TEARDOWNS) { + for (RebuildingScannerTestHelper.INTERRUPT interrupt : INTERRUPTS) { + queryResults.add(getQueryResult(teardown, interrupt)); + } + } + } + + private QueryResult getQueryResult(RebuildingScannerTestHelper.TEARDOWN teardown, RebuildingScannerTestHelper.INTERRUPT interrupt) throws Exception { + // Initialize the query settings. QueryImpl settings = new QueryImpl(); - settings.setBeginDate(startDate); - settings.setEndDate(endDate); + settings.setBeginDate(this.startDate); + settings.setEndDate(this.endDate); settings.setPagesize(Integer.MAX_VALUE); settings.setQueryAuthorizations(auths.serialize()); - settings.setQuery(querystr); - settings.setParameters(extraParms); + settings.setQuery(this.query); + settings.setParameters(this.queryParameters); settings.setId(UUID.randomUUID()); log.debug("query: " + settings.getQuery()); - log.debug("logic: " + settings.getQueryLogicName()); + log.debug("queryLogicName: " + settings.getQueryLogicName()); + // Initialize the query logic. + AccumuloClient client = createClient(teardown, interrupt); GenericQueryConfiguration config = logic.initialize(client, settings, authSet); logic.setupQuery(config); + // Run the query and retrieve the response. DocumentTransformer transformer = (DocumentTransformer) (logic.getTransformer(settings)); - TransformIterator iter = new DatawaveTransformIterator(logic.iterator(), transformer); - List eventList = new ArrayList<>(); - while (iter.hasNext()) { - eventList.add(iter.next()); - } + List eventList = Lists.newArrayList(new DatawaveTransformIterator<>(logic.iterator(), transformer)); + DefaultEventQueryResponse response = ((DefaultEventQueryResponse) transformer.createResponse(eventList)); - BaseQueryResponse response = transformer.createResponse(eventList); - - // un-comment to look at the json output - ObjectMapper mapper = new ObjectMapper(); - mapper.enable(MapperFeature.USE_WRAPPER_NAME_AS_PROPERTY_NAME); - mapper.writeValue(temporaryFolder.newFile(), response); - - Assert.assertTrue(response instanceof DefaultEventQueryResponse); - DefaultEventQueryResponse eventQueryResponse = (DefaultEventQueryResponse) response; - - Assert.assertEquals("Got the wrong number of events", expected.size(), (long) eventQueryResponse.getReturnedEvents()); - - for (EventBase event : eventQueryResponse.getEvents()) { - - String firstKey = ""; - String secondKey = ""; - Integer value = null; - for (Object field : event.getFields()) { - FieldBase fieldBase = (FieldBase) field; - switch (fieldBase.getName()) { - case "COUNT": - value = Integer.valueOf(fieldBase.getValueString()); - break; - case "GENDER": - case "GEN": - case "BIRTHDAY": - firstKey = fieldBase.getValueString(); - break; - case "AGE": - case "AG": - case "RECORD": - secondKey = fieldBase.getValueString(); - break; - } - } + // Return the test result. + return new QueryResult(teardown, interrupt, response); + } - log.debug("mapping is " + firstKey + "-" + secondKey + " count:" + value); - String key; - if (!firstKey.isEmpty() && !secondKey.isEmpty()) { - key = firstKey + "-" + secondKey; - } else if (!firstKey.isEmpty()) { - key = firstKey; - } else { - key = secondKey; - } - Assert.assertEquals(expected.get(key), value); - } - return response; + private AccumuloClient createClient(RebuildingScannerTestHelper.TEARDOWN teardown, RebuildingScannerTestHelper.INTERRUPT interrupt) throws Exception { + AccumuloClient client = new QueryTestTableHelper(getClass().toString(), log, teardown, interrupt).client; + dataWriter.accept(client, getRange()); + PrintUtility.printTable(client, auths, TableName.SHARD); + PrintUtility.printTable(client, auths, TableName.SHARD_INDEX); + PrintUtility.printTable(client, auths, QueryTestTableHelper.MODEL_TABLE_NAME); + return client; } @Test - public void testGrouping() throws Exception { - Map extraParameters = new HashMap<>(); + public void testGroupByAgeAndGenderWithBatchSizeOfSix() throws Exception { + givenNonModelData(); - Date startDate = format.parse("20091231"); - Date endDate = format.parse("20150101"); + givenQuery("UUID =~ '^[CS].*'"); - String queryString = "UUID =~ '^[CS].*'"; + givenQueryParameter(QueryParameters.GROUP_FIELDS, "AGE,$GENDER"); + givenQueryParameter(QueryParameters.GROUP_FIELDS_BATCH_SIZE, "6"); - // @formatter:off - Map expectedMap = ImmutableMap. builder() - .put("FEMALE-18", 2) - .put("MALE-30", 1) - .put("MALE-34", 1) - .put("MALE-16", 1) - .put("MALE-40", 2) - .put("MALE-20", 2) - .put("MALE-24", 1) - .put("MALE-22", 2) - .build(); - // @formatter:on + expectGroup(Group.of("FEMALE", "18").withCount(2)); + expectGroup(Group.of("MALE", "30").withCount(1)); + expectGroup(Group.of("MALE", "34").withCount(1)); + expectGroup(Group.of("MALE", "16").withCount(1)); + expectGroup(Group.of("MALE", "40").withCount(2)); + expectGroup(Group.of("MALE", "20").withCount(2)); + expectGroup(Group.of("MALE", "24").withCount(1)); + expectGroup(Group.of("MALE", "22").withCount(2)); - extraParameters.put("group.fields", "AGE,$GENDER"); - extraParameters.put("group.fields.batch.size", "6"); + // Run the test queries and collect their results. + collectQueryResults(); - List> responseEvents = new ArrayList<>(); - for (RebuildingScannerTestHelper.TEARDOWN teardown : TEARDOWNS) { - for (RebuildingScannerTestHelper.INTERRUPT interrupt : INTERRUPTS) { - responseEvents.add(((DefaultEventQueryResponse) runTestQueryWithGrouping(expectedMap, queryString, startDate, endDate, extraParameters, - teardown, interrupt)).getEvents()); - } - } - List digested = digest(responseEvents); - log.debug("reponses:" + digested); - Set responseSet = Sets.newHashSet(digested); - // if the grouped results from every type of rebuild are the same, there should be only 1 entry in the responseSet - Assert.assertEquals(responseSet.size(), 1); - } - - // grab the relevant stuff from the events and do some formatting - private List digest(List> in) { - List stringList = new ArrayList<>(); - for (List list : in) { - StringBuilder builder = new StringBuilder(); - for (EventBase eb : list) { - for (Object field : eb.getFields()) { - FieldBase fieldBase = (FieldBase) field; - builder.append(fieldBase.getName()); - builder.append(':'); - builder.append(fieldBase.getTypedValue().getValue()); - builder.append(','); - } - } - stringList.add(builder.toString() + '\n'); - } - return stringList; + // Verify the results. + assertGroups(); + assertResponseEventsAreIdenticalForAllTestResults(); } + /** + * Verify grouping by age with a batch size of 6 works correctly. + */ @Test - public void testGrouping2() throws Exception { - Map extraParameters = new HashMap<>(); + public void testGroupByAgeWithBatchSizeOfSix() throws Exception { + givenNonModelData(); - Date startDate = format.parse("20091231"); - Date endDate = format.parse("20150101"); + givenQuery("UUID =~ '^[CS].*'"); - String queryString = "UUID =~ '^[CS].*'"; + givenQueryParameter(QueryParameters.GROUP_FIELDS, "AGE"); + givenQueryParameter(QueryParameters.GROUP_FIELDS_BATCH_SIZE, "6"); - // @formatter:off - Map expectedMap = ImmutableMap. builder() - .put("18", 2) - .put("30", 1) - .put("34", 1) - .put("16", 1) - .put("40", 2) - .put("20", 2) - .put("24", 1) - .put("22", 2) - .build(); - // @formatter:on - extraParameters.put("group.fields", "AGE"); - extraParameters.put("group.fields.batch.size", "6"); + expectGroup(Group.of("18").withCount(2)); + expectGroup(Group.of("30").withCount(1)); + expectGroup(Group.of("34").withCount(1)); + expectGroup(Group.of("16").withCount(1)); + expectGroup(Group.of("40").withCount(2)); + expectGroup(Group.of("20").withCount(2)); + expectGroup(Group.of("24").withCount(1)); + expectGroup(Group.of("22").withCount(2)); - for (RebuildingScannerTestHelper.TEARDOWN teardown : TEARDOWNS) { - for (RebuildingScannerTestHelper.INTERRUPT interrupt : INTERRUPTS) { - runTestQueryWithGrouping(expectedMap, queryString, startDate, endDate, extraParameters, teardown, interrupt); - } - } + collectQueryResults(); + + assertGroups(); } + /** + * Verify that grouping by gender with a batch size of 0 works correctly. + */ @Test - public void testGrouping3() throws Exception { - Map extraParameters = new HashMap<>(); + public void testGroupByGenderWithBatchSizeOfZero() throws Exception { + givenNonModelData(); - Date startDate = format.parse("20091231"); - Date endDate = format.parse("20150101"); + givenQuery("UUID =~ '^[CS].*'"); - String queryString = "UUID =~ '^[CS].*'"; + givenQueryParameter(QueryParameters.GROUP_FIELDS, "GENDER"); + givenQueryParameter(QueryParameters.GROUP_FIELDS_BATCH_SIZE, "0"); - Map expectedMap = ImmutableMap.of("MALE", 10, "FEMALE", 2); + expectGroup(Group.of("MALE").withCount(10)); + expectGroup(Group.of("FEMALE").withCount(2)); - extraParameters.put("group.fields", "GENDER"); - extraParameters.put("group.fields.batch.size", "0"); + // Run the test queries and collect their results. + collectQueryResults(); - for (RebuildingScannerTestHelper.TEARDOWN teardown : TEARDOWNS) { - for (RebuildingScannerTestHelper.INTERRUPT interrupt : INTERRUPTS) { - runTestQueryWithGrouping(expectedMap, queryString, startDate, endDate, extraParameters, teardown, interrupt); - } - } + // Verify the results. + assertGroups(); } + /** + * Verify grouping by gender with a batch size of 6 works correctly. + */ @Test - public void testGroupingWithReducedResponse() throws Exception { - Map extraParameters = new HashMap<>(); + public void testGroupByGenderWithBatchSizeOfSix() throws Exception { + givenNonModelData(); - Date startDate = format.parse("20091231"); - Date endDate = format.parse("20150101"); + givenQuery("UUID =~ '^[CS].*'"); - String queryString = "UUID =~ '^[CS].*'"; + givenQueryParameter(QueryParameters.GROUP_FIELDS, "GENDER"); + givenQueryParameter(QueryParameters.GROUP_FIELDS_BATCH_SIZE, "6"); - Map expectedMap = ImmutableMap.of("MALE", 10, "FEMALE", 2); + expectGroup(Group.of("MALE").withCount(10)); + expectGroup(Group.of("FEMALE").withCount(2)); - extraParameters.put("reduced.response", "true"); - extraParameters.put("group.fields", "GENDER"); - extraParameters.put("group.fields.batch.size", "0"); + // Run the test queries and collect their results. + collectQueryResults(); - for (RebuildingScannerTestHelper.TEARDOWN teardown : TEARDOWNS) { - for (RebuildingScannerTestHelper.INTERRUPT interrupt : INTERRUPTS) { - EventQueryResponseBase response = (EventQueryResponseBase) runTestQueryWithGrouping(expectedMap, queryString, startDate, endDate, - extraParameters, teardown, interrupt); - - for (EventBase event : response.getEvents()) { - // The event should have a collapsed columnVisibility - String actualCV = event.getMarkings().get(COLVIS_MARKING).toString(); - Assert.assertEquals(EXPECTED_COLVIS, actualCV); - - // The fields should have no columnVisibility - for (Object f : event.getFields()) { - FieldBase field = (FieldBase) f; - Assert.assertNull(field.getMarkings().get(COLVIS_MARKING)); - } + // Verify the results. + assertGroups(); + } + + /** + * Verify that reducing the response when grouping results in the correct combined visibility and markings. + */ + @Test + public void testGroupByWithReducedResponse() throws Exception { + givenNonModelData(); + + givenQuery("UUID =~ '^[CS].*'"); + + givenQueryParameter(QueryOptions.REDUCED_RESPONSE, "true"); + givenQueryParameter(QueryParameters.GROUP_FIELDS, "GENDER"); + givenQueryParameter(QueryParameters.GROUP_FIELDS_BATCH_SIZE, "0"); + + expectGroup(Group.of("MALE").withCount(10)); + expectGroup(Group.of("FEMALE").withCount(2)); + + // Run the test queries and collect their results. + collectQueryResults(); + + // Verify the results. + assertGroups(); + + // Verify that the column visibility was appropriately reduced + for (QueryResult result : queryResults) { + // noinspection rawtypes + for (EventBase event : result.response.getEvents()) { + String eventCV = event.getMarkings().get(COLVIS_MARKING).toString(); + assertThat(eventCV).describedAs("Assert event cv for teardown: %s, interrupt: %s", result.teardown, result.interrupt).isEqualTo(REDUCED_COLVIS); + // noinspection unchecked + for (FieldBase field : (List>) event.getFields()) { + String fieldCV = field.getMarkings().get(COLVIS_MARKING); + assertThat(fieldCV).describedAs("Assert null field cv for field: %s, teardown: %s, interrupt: %s", field.getName(), result.teardown, + result.interrupt).isNull(); } } } } + /** + * Verify that grouping by multivalued entries with no context works correctly. + */ @Test - public void testGrouping4() throws Exception { - Map extraParameters = new HashMap<>(); + public void testGroupByRecord() throws Exception { + givenNonModelData(); - Date startDate = format.parse("20091231"); - Date endDate = format.parse("20150101"); + givenQuery("UUID =~ '^[CS].*'"); - String queryString = "UUID =~ '^[CS].*'"; + givenQueryParameter(QueryParameters.GROUP_FIELDS, "RECORD"); - Map expectedMap = ImmutableMap.of("MALE", 10, "FEMALE", 2); + expectGroup(Group.of("1").withCount(3)); + expectGroup(Group.of("2").withCount(3)); + expectGroup(Group.of("3").withCount(1)); - extraParameters.put("group.fields", "GENDER"); - extraParameters.put("group.fields.batch.size", "6"); + // Run the test queries and collect their results. + collectQueryResults(); - for (RebuildingScannerTestHelper.TEARDOWN teardown : TEARDOWNS) { - for (RebuildingScannerTestHelper.INTERRUPT interrupt : INTERRUPTS) { - runTestQueryWithGrouping(expectedMap, queryString, startDate, endDate, extraParameters, teardown, interrupt); - } - } + // Verify the results. + assertGroups(); } + /** + * Verify that grouping multivalued entries with no context in combination with entries that have grouping context works correctly. + */ @Test - public void testGroupingEntriesWithNoContext() throws Exception { - // Testing multivalued entries with no grouping context - Map extraParameters = new HashMap<>(); + public void testGroupByGenderAndRecord() throws Exception { + givenNonModelData(); - Date startDate = format.parse("20091231"); - Date endDate = format.parse("20150101"); + givenQuery("UUID =~ '^[CS].*'"); - String queryString = "UUID =~ '^[CS].*'"; + givenQueryParameter(QueryParameters.GROUP_FIELDS, "GENDER,RECORD"); - Map expectedMap = ImmutableMap.of("1", 3, "2", 3, "3", 1); + expectGroup(Group.of("FEMALE", "1").withCount(2)); + expectGroup(Group.of("FEMALE", "2").withCount(2)); + expectGroup(Group.of("MALE", "1").withCount(10)); + expectGroup(Group.of("MALE", "2").withCount(10)); + expectGroup(Group.of("MALE", "3").withCount(4)); - extraParameters.put("group.fields", "RECORD"); + // Run the test queries and collect their results. + collectQueryResults(); - for (RebuildingScannerTestHelper.TEARDOWN teardown : TEARDOWNS) { - for (RebuildingScannerTestHelper.INTERRUPT interrupt : INTERRUPTS) { - runTestQueryWithGrouping(expectedMap, queryString, startDate, endDate, extraParameters, teardown, interrupt); - } - } + // Verify the results. + assertGroups(); } + /** + * Verify that specifying group fields via a JEXL function works correctly. + */ @Test - public void testGroupingMixedEntriesWithAndWithNoContext() throws Exception { - // Testing multivalued entries with no grouping context in combination with a grouping context entries - Map extraParameters = new HashMap<>(); + public void testGroupByJexlFunction() throws Exception { + givenNonModelData(); - Date startDate = format.parse("20091231"); - Date endDate = format.parse("20150101"); + givenQuery("UUID =~ '^[CS].*' && f:groupby('$AGE','GENDER')"); - String queryString = "UUID =~ '^[CS].*'"; + givenQueryParameter(QueryParameters.GROUP_FIELDS_BATCH_SIZE, "6"); - // @formatter:off - Map expectedMap = ImmutableMap. builder() - .put("FEMALE-2", 1) - .put("MALE-1", 3) - .put("MALE-2", 2) - .put("MALE-3", 1) - .build(); - // @formatter:on + expectGroup(Group.of("FEMALE", "18").withCount(2)); + expectGroup(Group.of("MALE", "30").withCount(1)); + expectGroup(Group.of("MALE", "34").withCount(1)); + expectGroup(Group.of("MALE", "16").withCount(1)); + expectGroup(Group.of("MALE", "40").withCount(2)); + expectGroup(Group.of("MALE", "20").withCount(2)); + expectGroup(Group.of("MALE", "24").withCount(1)); + expectGroup(Group.of("MALE", "22").withCount(2)); - extraParameters.put("group.fields", "GENDER,RECORD"); - // extraParameters.put("group.fields.batch.size", "12"); + // Run the test queries and collect their results. + collectQueryResults(); - for (RebuildingScannerTestHelper.TEARDOWN teardown : TEARDOWNS) { - for (RebuildingScannerTestHelper.INTERRUPT interrupt : INTERRUPTS) { - runTestQueryWithGrouping(expectedMap, queryString, startDate, endDate, extraParameters, teardown, interrupt); - } - } + // Verify the results. + assertGroups(); } + /** + * Verify that specifying group fields via a LUCENE function works correctly. + */ @Test - public void testGroupingUsingFunction() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("group.fields.batch.size", "6"); + public void testGroupByLuceneFunction() throws Exception { + givenNonModelData(); - Date startDate = format.parse("20091231"); - Date endDate = format.parse("20150101"); + givenQuery("(UUID:C* or UUID:S* ) and #GROUPBY('AGE','$GENDER')"); - String queryString = "UUID =~ '^[CS].*' && f:groupby('$AGE','GENDER')"; + givenQueryParameter(QueryParameters.GROUP_FIELDS_BATCH_SIZE, "6"); - // @formatter:off - Map expectedMap = ImmutableMap. builder() - .put("FEMALE-18", 2) - .put("MALE-30", 1) - .put("MALE-34", 1) - .put("MALE-16", 1) - .put("MALE-40", 2) - .put("MALE-20", 2) - .put("MALE-24", 1) - .put("MALE-22", 2) - .build(); - // @formatter:on + givenLuceneParserForLogic(); - for (RebuildingScannerTestHelper.TEARDOWN teardown : TEARDOWNS) { - for (RebuildingScannerTestHelper.INTERRUPT interrupt : INTERRUPTS) { - runTestQueryWithGrouping(expectedMap, queryString, startDate, endDate, extraParameters, teardown, interrupt); - } - } + expectGroup(Group.of("FEMALE", "18").withCount(2)); + expectGroup(Group.of("MALE", "30").withCount(1)); + expectGroup(Group.of("MALE", "34").withCount(1)); + expectGroup(Group.of("MALE", "16").withCount(1)); + expectGroup(Group.of("MALE", "40").withCount(2)); + expectGroup(Group.of("MALE", "20").withCount(2)); + expectGroup(Group.of("MALE", "24").withCount(1)); + expectGroup(Group.of("MALE", "22").withCount(2)); + + // Run the test queries and collect their results. + collectQueryResults(); + + // Verify the results. + assertGroups(); } + /** + * Verify that specifying group fields via a LUCENE function with two values works correctly. + */ @Test - public void testGroupingUsingLuceneFunction() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("group.fields.batch.size", "6"); + public void testGroupByLuceneFunctionWithDuplicateValues() throws Exception { + givenNonModelData(); - Date startDate = format.parse("20091231"); - Date endDate = format.parse("20150101"); + givenQuery("(UUID:CORLEONE) and #GROUPBY('AGE','BIRTHDAY')"); - String queryString = "(UUID:C* or UUID:S* ) and #GROUPBY('AGE','$GENDER')"; + givenQueryParameter(QueryParameters.GROUP_FIELDS_BATCH_SIZE, "6"); - // @formatter:off - Map expectedMap = ImmutableMap. builder() - .put("FEMALE-18", 2) - .put("MALE-30", 1) - .put("MALE-34", 1) - .put("MALE-16", 1) - .put("MALE-40", 2) - .put("MALE-20", 2) - .put("MALE-24", 1) - .put("MALE-22", 2) - .build(); - // @formatter:on - logic.setParser(new LuceneToJexlQueryParser()); - for (RebuildingScannerTestHelper.TEARDOWN teardown : TEARDOWNS) { - for (RebuildingScannerTestHelper.INTERRUPT interrupt : INTERRUPTS) { - runTestQueryWithGrouping(expectedMap, queryString, startDate, endDate, extraParameters, teardown, interrupt); - } - } - logic.setParser(new JexlControlledQueryParser()); + givenLuceneParserForLogic(); + + expectGroup(Group.of("4", "18").withCount(1)); + expectGroup(Group.of("5", "40").withCount(1)); + expectGroup(Group.of("3", "20").withCount(1)); + expectGroup(Group.of("1", "24").withCount(1)); + expectGroup(Group.of("2", "22").withCount(1)); + expectGroup(Group.of("22", "22").withCount(1)); + + // Run the test queries and collect their results. + collectQueryResults(); + + // Verify the results. + assertGroups(); } @Test - public void testGroupingUsingLuceneFunctionWithDuplicateValues() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("group.fields.batch.size", "6"); + public void testGroupingByGenderAndAllAgeMetrics() throws Exception { + givenNonModelData(); - Date startDate = format.parse("20091231"); - Date endDate = format.parse("20150101"); + givenQuery("UUID =~ '^[CS].*'"); - String queryString = "(UUID:CORLEONE) and #GROUPBY('AGE','BIRTHDAY')"; + givenQueryParameter(QueryParameters.GROUP_FIELDS, "GENDER"); + givenQueryParameter(QueryParameters.MAX_FIELDS, "AGE"); + givenQueryParameter(QueryParameters.MIN_FIELDS, "AGE"); + givenQueryParameter(QueryParameters.SUM_FIELDS, "AGE"); + givenQueryParameter(QueryParameters.AVERAGE_FIELDS, "AGE"); + givenQueryParameter(QueryParameters.COUNT_FIELDS, "AGE"); + givenQueryParameter(QueryParameters.GROUP_FIELDS_BATCH_SIZE, "6"); - // @formatter:off - Map expectedMap = ImmutableMap. builder() - .put("4-18", 1) - .put("5-40", 1) - .put("3-20", 1) - .put("1-24", 1) - .put("2-22", 1) - .put("22-22", 1) - .build(); - // @formatter:on - logic.setParser(new LuceneToJexlQueryParser()); - for (RebuildingScannerTestHelper.TEARDOWN teardown : TEARDOWNS) { - for (RebuildingScannerTestHelper.INTERRUPT interrupt : INTERRUPTS) { - runTestQueryWithGrouping(expectedMap, queryString, startDate, endDate, extraParameters, teardown, interrupt); - } - } - logic.setParser(new JexlControlledQueryParser()); + expectGroup(Group.of("MALE").withCount(10) + .withAggregate(Aggregate.of("AGE").withCount("10").withMax("40").withMin("16").withSum("268").withAverage("26.8"))); + expectGroup(Group.of("FEMALE").withCount(2) + .withAggregate(Aggregate.of("AGE").withCount("2").withMax("18").withMin("18").withSum("36").withAverage("18"))); + + // Run the test queries and collect their results. + collectQueryResults(); + + // Verify the results. + assertGroups(); } @Test - public void testCountingMap() { - MarkingFunctions markingFunctions = new MarkingFunctions.Default(); - GroupCountingHashMap map = new GroupCountingHashMap(markingFunctions); - GroupingTypeAttribute attr1 = new GroupingTypeAttribute(new LcType("FOO"), new Key("FOO"), true); - attr1.setColumnVisibility(new ColumnVisibility("A")); - map.add(Collections.singleton(attr1)); - - GroupingTypeAttribute attr2 = new GroupingTypeAttribute(new LcType("FOO"), new Key("FOO"), true); - attr2.setColumnVisibility(new ColumnVisibility("B")); - map.add(Collections.singleton(attr2)); - GroupingTypeAttribute attr3 = new GroupingTypeAttribute(new LcType("BAR"), new Key("BAR"), true); - attr3.setColumnVisibility(new ColumnVisibility("C")); - map.add(Collections.singleton(attr3)); - - log.debug("map is: " + map); - - for (Map.Entry>,Integer> entry : map.entrySet()) { - Attribute attr = entry.getKey().iterator().next(); // the first and only one - int count = entry.getValue(); - if (attr.getData().toString().equals("FOO")) { - Assert.assertEquals(2, count); - Assert.assertEquals(new ColumnVisibility("A&B"), attr.getColumnVisibility()); - } else if (attr.getData().toString().equals("BAR")) { - Assert.assertEquals(1, count); - Assert.assertEquals(new ColumnVisibility("C"), attr.getColumnVisibility()); - } - } + public void testGroupingByGenderAndAllAgeMetricsUsingJexlFunction() throws Exception { + givenNonModelData(); + + givenQuery("UUID =~ '^[CS].*' && f:groupby('$GENDER') && f:sum('AGE') && f:min('AGE') && f:max('AGE') && f:average('AGE') && f:count('AGE')"); + + expectGroup(Group.of("MALE").withCount(10) + .withAggregate(Aggregate.of("AGE").withCount("10").withMax("40").withMin("16").withSum("268").withAverage("26.8"))); + expectGroup(Group.of("FEMALE").withCount(2) + .withAggregate(Aggregate.of("AGE").withCount("2").withMax("18").withMin("18").withSum("36").withAverage("18"))); + + // Run the test queries and collect their results. + collectQueryResults(); + + // Verify the results. + assertGroups(); } @Test - public void testCountingMapAgain() { - MarkingFunctions markingFunctions = new MarkingFunctions.Default(); - GroupCountingHashMap map = new GroupCountingHashMap(markingFunctions); - - GroupingTypeAttribute attr1a = new GroupingTypeAttribute(new LcType("FOO"), new Key("NAME"), true); - attr1a.setColumnVisibility(new ColumnVisibility("A")); - GroupingTypeAttribute attr1b = new GroupingTypeAttribute(new NumberType("5"), new Key("AGE"), true); - attr1b.setColumnVisibility(new ColumnVisibility("C")); - Set> seta = Sets.newHashSet(attr1a, attr1b); - map.add(seta); - - GroupingTypeAttribute attr2a = new GroupingTypeAttribute(new LcType("FOO"), new Key("NAME"), true); - attr2a.setColumnVisibility(new ColumnVisibility("B")); - GroupingTypeAttribute attr2b = new GroupingTypeAttribute(new NumberType("5"), new Key("AGE"), true); - attr2b.setColumnVisibility(new ColumnVisibility("D")); - Set> setb = Sets.newHashSet(attr2a, attr2b); - map.add(setb); - - // even though the ColumnVisibilities are different, the 2 collections seta and setb are 'equal' and generate the same hashCode - Assert.assertEquals(seta.hashCode(), setb.hashCode()); - Assert.assertEquals(seta, setb); - - GroupingTypeAttribute attr3a = new GroupingTypeAttribute(new LcType("BAR"), new Key("NAME"), true); - attr3a.setColumnVisibility(new ColumnVisibility("C")); - GroupingTypeAttribute attr3b = new GroupingTypeAttribute(new NumberType("6"), new Key("AGE"), true); - attr3b.setColumnVisibility(new ColumnVisibility("D")); - map.add(Sets.newHashSet(attr3a, attr3b)); - - log.debug("map is: " + map); - - for (Map.Entry>,Integer> entry : map.entrySet()) { - for (Attribute attr : entry.getKey()) { - int count = entry.getValue(); - if (attr.getData().toString().equals("FOO")) { - Assert.assertEquals(2, count); - // the ColumnVisibility for the key was changed to the merged value of the 2 items that were added to the map - Assert.assertEquals(new ColumnVisibility("A&B"), attr.getColumnVisibility()); - } else if (attr.getData().toString().equals("5")) { - Assert.assertEquals(2, count); - // the ColumnVisibility for the key was changed to the merged value of the 2 items that were added to the map - Assert.assertEquals(new ColumnVisibility("C&D"), attr.getColumnVisibility()); - } else if (attr.getData().toString().equals("BAR")) { - Assert.assertEquals(1, count); - Assert.assertEquals(new ColumnVisibility("C"), attr.getColumnVisibility()); - } else if (attr.getData().toString().equals("6")) { - Assert.assertEquals(1, count); - Assert.assertEquals(new ColumnVisibility("D"), attr.getColumnVisibility()); - } - } - } + public void testGroupingByGenderAndAllAgeMetricsUsingLuceneFunction() throws Exception { + givenNonModelData(); + + givenQuery("(UUID:C* or UUID:S* ) and #GROUPBY('$GENDER') and #SUM('AGE') and #MAX('AGE') and #MIN('AGE') and #AVERAGE('AGE') and #COUNT('AGE')"); + givenLuceneParserForLogic(); + + expectGroup(Group.of("MALE").withCount(10) + .withAggregate(Aggregate.of("AGE").withCount("10").withMax("40").withMin("16").withSum("268").withAverage("26.8"))); + expectGroup(Group.of("FEMALE").withCount(2) + .withAggregate(Aggregate.of("AGE").withCount("2").withMax("18").withMin("18").withSum("36").withAverage("18"))); + + // Run the test queries and collect their results. + collectQueryResults(); + + // Verify the results. + assertGroups(); } - private static RemoteEdgeDictionary mockRemoteEdgeDictionary = EasyMock.createMock(RemoteEdgeDictionary.class); + @Test + public void testGroupByAgeAndGenderWithBatchSizeOfSixUsingModel() throws Exception { + givenModelData(); - public static class Producer { - @Produces - public static RemoteEdgeDictionary produceRemoteEdgeDictionary() { - return mockRemoteEdgeDictionary; - } + givenQuery("UUID =~ '^[CS].*'"); + + givenQueryParameter(QueryParameters.GROUP_FIELDS, "AG,GEN"); + givenQueryParameter(QueryParameters.GROUP_FIELDS_BATCH_SIZE, "6"); + + expectGroup(Group.of("FEMALE", "18").withCount(2)); + expectGroup(Group.of("MALE", "30").withCount(1)); + expectGroup(Group.of("MALE", "34").withCount(1)); + expectGroup(Group.of("MALE", "16").withCount(1)); + expectGroup(Group.of("MALE", "40").withCount(2)); + expectGroup(Group.of("MALE", "20").withCount(2)); + expectGroup(Group.of("MALE", "24").withCount(1)); + expectGroup(Group.of("MALE", "22").withCount(2)); + + // Run the test queries and collect their results. + collectQueryResults(); + + // Verify the results. + assertGroups(); + assertResponseEventsAreIdenticalForAllTestResults(); } + @Test + public void testGroupByAgeWithBatchSizeOfSixUsingModel() throws Exception { + // Set up the test. + givenModelData(); + + givenQuery("UUID =~ '^[CS].*'"); + + givenQueryParameter(QueryParameters.GROUP_FIELDS, "AG"); + givenQueryParameter(QueryParameters.GROUP_FIELDS_BATCH_SIZE, "6"); + + expectGroup(Group.of("18").withCount(2)); + expectGroup(Group.of("30").withCount(1)); + expectGroup(Group.of("34").withCount(1)); + expectGroup(Group.of("16").withCount(1)); + expectGroup(Group.of("40").withCount(2)); + expectGroup(Group.of("20").withCount(2)); + expectGroup(Group.of("24").withCount(1)); + expectGroup(Group.of("22").withCount(2)); + + // Run the test queries and collect their results. + collectQueryResults(); + + // Verify the results. + assertGroups(); + } + + @Test + public void testGroupByGenderWithBatchSizeOfSixUsingModel() throws Exception { + givenModelData(); + + givenQuery("UUID =~ '^[CS].*'"); + + givenQueryParameter(QueryParameters.GROUP_FIELDS, "GEN"); + givenQueryParameter(QueryParameters.GROUP_FIELDS_BATCH_SIZE, "6"); + + expectGroup(Group.of("MALE").withCount(10)); + expectGroup(Group.of("FEMALE").withCount(2)); + + collectQueryResults(); + + assertGroups(); + } + + @Test + public void testGroupByGenderWithBatchSizeOfZeroUsingModel() throws Exception { + givenModelData(); + + givenQuery("UUID =~ '^[CS].*'"); + + givenQueryParameter(QueryParameters.GROUP_FIELDS, "GEN"); + givenQueryParameter(QueryParameters.GROUP_FIELDS_BATCH_SIZE, "0"); + + expectGroup(Group.of("MALE").withCount(10)); + expectGroup(Group.of("FEMALE").withCount(2)); + + collectQueryResults(); + + assertGroups(); + } + + @Test + public void testGroupByJexlFunctionsUsingModel() throws Exception { + givenModelData(); + + givenQuery("UUID =~ '^[CS].*' && f:groupby('AG','GEN')"); + + givenQueryParameter(QueryParameters.GROUP_FIELDS_BATCH_SIZE, "6"); + + expectGroup(Group.of("FEMALE", "18").withCount(2)); + expectGroup(Group.of("MALE", "30").withCount(1)); + expectGroup(Group.of("MALE", "34").withCount(1)); + expectGroup(Group.of("MALE", "16").withCount(1)); + expectGroup(Group.of("MALE", "40").withCount(2)); + expectGroup(Group.of("MALE", "20").withCount(2)); + expectGroup(Group.of("MALE", "24").withCount(1)); + expectGroup(Group.of("MALE", "22").withCount(2)); + + collectQueryResults(); + + assertGroups(); + } + + @Test + public void testGroupByLuceneFunctionUsingModel() throws Exception { + givenModelData(); + + givenQuery("(UUID:C* or UUID:S* ) and #GROUPBY('AG','GEN')"); + + givenQueryParameter(QueryParameters.GROUP_FIELDS_BATCH_SIZE, "6"); + + givenLuceneParserForLogic(); + + expectGroup(Group.of("FEMALE", "18").withCount(2)); + expectGroup(Group.of("MALE", "30").withCount(1)); + expectGroup(Group.of("MALE", "34").withCount(1)); + expectGroup(Group.of("MALE", "16").withCount(1)); + expectGroup(Group.of("MALE", "40").withCount(2)); + expectGroup(Group.of("MALE", "20").withCount(2)); + expectGroup(Group.of("MALE", "24").withCount(1)); + expectGroup(Group.of("MALE", "22").withCount(2)); + + collectQueryResults(); + + assertGroups(); + } + + @Test + public void testGroupingByGenderAndAllAgeMetricsUsingModel() throws Exception { + givenModelData(); + + givenQuery("UUID =~ '^[CS].*'"); + + givenQueryParameter(QueryParameters.GROUP_FIELDS, "GEN"); + givenQueryParameter(QueryParameters.MAX_FIELDS, "AG"); + givenQueryParameter(QueryParameters.MIN_FIELDS, "AG"); + givenQueryParameter(QueryParameters.SUM_FIELDS, "AG"); + givenQueryParameter(QueryParameters.AVERAGE_FIELDS, "AG"); + givenQueryParameter(QueryParameters.COUNT_FIELDS, "AG"); + givenQueryParameter(QueryParameters.GROUP_FIELDS_BATCH_SIZE, "6"); + + expectGroup(Group.of("MALE").withCount(10) + .withAggregate(Aggregate.of("AG").withCount("10").withMax("40").withMin("16").withSum("268").withAverage("26.8"))); + expectGroup(Group.of("FEMALE").withCount(2) + .withAggregate(Aggregate.of("AG").withCount("2").withMax("18").withMin("18").withSum("36").withAverage("18"))); + + // Run the test queries and collect their results. + collectQueryResults(); + + // Verify the results. + assertGroups(); + } + + /** + * Verify that aggregating values when grouping by multivalued entries with no context works correctly. + */ + @Test + public void testGroupByRecordWithAggregation() throws Exception { + givenNonModelData(); + + givenQuery("UUID =~ '^[CS].*' && f:sum('AGE') && f:min('GENDER') && f:max('GENDER') && f:average('BIRTHDAY') && f:count('GENDER', 'AGE', 'BIRTHDAY')"); + + givenQueryParameter(QueryParameters.GROUP_FIELDS, "RECORD"); + + // @formatter:off + expectGroup(Group.of("1").withCount(3) + .withAggregate(Aggregate.of("AGE").withSum("304").withCount("12")) + .withAggregate(Aggregate.of("BIRTHDAY").withAverage("6.166666667").withCount("6")) + .withAggregate(Aggregate.of("GENDER").withMin("FEMALE").withMax("MALE").withCount("12"))); + expectGroup(Group.of("2").withCount(3) + .withAggregate(Aggregate.of("AGE").withSum("304").withCount("12")) + .withAggregate(Aggregate.of("BIRTHDAY").withAverage("6.166666667").withCount("6")) + .withAggregate(Aggregate.of("GENDER").withMin("FEMALE").withMax("MALE").withCount("12"))); + expectGroup(Group.of("3").withCount(1) + .withAggregate(Aggregate.of("AGE").withSum("124").withCount("4")) + .withAggregate(Aggregate.of("BIRTHDAY").withCount("0")) + .withAggregate(Aggregate.of("GENDER").withMin("MALE").withMax("MALE").withCount("4"))); + // @formatter:on + // Run the test queries and collect their results. + collectQueryResults(); + + // Verify the results. + assertGroups(); + } + + /** + * Verify that attempting to sum a non-numerical value results in an exception. + */ + @Test + public void testSummingNonNumericalValue() { + givenNonModelData(); + + givenQuery("UUID =~ '^[CS].*' && f:sum('GENDER')"); + + givenQueryParameter(QueryParameters.GROUP_FIELDS, "RECORD"); + + Assertions.assertThatIllegalArgumentException().isThrownBy(this::collectQueryResults) + .withMessage("Unable to calculate a sum with non-numerical value 'MALE'"); + } + + /** + * Verify that attempting to average a non-numerical value results in an exception. + */ + @Test + public void testAveragingNonNumericalValue() { + givenNonModelData(); + + givenQuery("UUID =~ '^[CS].*' && f:average('GENDER')"); + + givenQueryParameter(QueryParameters.GROUP_FIELDS, "RECORD"); + + Assertions.assertThatIllegalArgumentException().isThrownBy(this::collectQueryResults) + .withMessage("Unable to calculate an average with non-numerical value 'MALE'"); + } } diff --git a/warehouse/query-core/src/test/java/datawave/query/transformer/GroupingTestWithModel.java b/warehouse/query-core/src/test/java/datawave/query/transformer/GroupingTestWithModel.java deleted file mode 100644 index 9267a183bf..0000000000 --- a/warehouse/query-core/src/test/java/datawave/query/transformer/GroupingTestWithModel.java +++ /dev/null @@ -1,534 +0,0 @@ -package datawave.query.transformer; - -import static datawave.query.RebuildingScannerTestHelper.TEARDOWN.ALWAYS; -import static datawave.query.RebuildingScannerTestHelper.TEARDOWN.ALWAYS_SANS_CONSISTENCY; -import static datawave.query.RebuildingScannerTestHelper.TEARDOWN.EVERY_OTHER; -import static datawave.query.RebuildingScannerTestHelper.TEARDOWN.EVERY_OTHER_SANS_CONSISTENCY; -import static datawave.query.RebuildingScannerTestHelper.TEARDOWN.NEVER; -import static datawave.query.RebuildingScannerTestHelper.TEARDOWN.RANDOM; -import static datawave.query.RebuildingScannerTestHelper.TEARDOWN.RANDOM_SANS_CONSISTENCY; - -import java.text.DateFormat; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.Date; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.TimeZone; -import java.util.UUID; - -import javax.inject.Inject; - -import org.apache.accumulo.core.client.AccumuloClient; -import org.apache.accumulo.core.data.Key; -import org.apache.accumulo.core.security.Authorizations; -import org.apache.accumulo.core.security.ColumnVisibility; -import org.apache.commons.collections4.iterators.TransformIterator; -import org.apache.commons.jexl2.parser.ASTJexlScript; -import org.apache.log4j.Logger; -import org.jboss.arquillian.container.test.api.Deployment; -import org.jboss.arquillian.junit.Arquillian; -import org.jboss.shrinkwrap.api.ShrinkWrap; -import org.jboss.shrinkwrap.api.asset.StringAsset; -import org.jboss.shrinkwrap.api.spec.JavaArchive; -import org.junit.AfterClass; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.junit.runner.RunWith; - -import com.fasterxml.jackson.databind.MapperFeature; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; - -import datawave.configuration.spring.SpringBean; -import datawave.data.type.LcType; -import datawave.data.type.NumberType; -import datawave.helpers.PrintUtility; -import datawave.ingest.data.TypeRegistry; -import datawave.marking.MarkingFunctions; -import datawave.query.QueryTestTableHelper; -import datawave.query.RebuildingScannerTestHelper; -import datawave.query.attributes.Attribute; -import datawave.query.common.grouping.GroupingUtil.GroupCountingHashMap; -import datawave.query.common.grouping.GroupingUtil.GroupingTypeAttribute; -import datawave.query.function.deserializer.KryoDocumentDeserializer; -import datawave.query.language.parser.jexl.JexlControlledQueryParser; -import datawave.query.language.parser.jexl.LuceneToJexlQueryParser; -import datawave.query.tables.ShardQueryLogic; -import datawave.query.tables.edge.DefaultEdgeEventQueryLogic; -import datawave.query.util.VisibilityWiseGuysIngestWithModel; -import datawave.util.TableName; -import datawave.webservice.edgedictionary.RemoteEdgeDictionary; -import datawave.webservice.query.QueryImpl; -import datawave.webservice.query.configuration.GenericQueryConfiguration; -import datawave.webservice.query.iterator.DatawaveTransformIterator; -import datawave.webservice.query.result.event.EventBase; -import datawave.webservice.query.result.event.FieldBase; -import datawave.webservice.result.BaseQueryResponse; -import datawave.webservice.result.DefaultEventQueryResponse; - -/** - * Applies grouping to queries - * - */ -public abstract class GroupingTestWithModel { - - private static final Logger log = Logger.getLogger(GroupingTestWithModel.class); - - private static Authorizations auths = new Authorizations("ALL", "E", "I"); - - // @formatter:off - private static final List TEARDOWNS = Lists.newArrayList( - NEVER, - ALWAYS, - ALWAYS_SANS_CONSISTENCY, - RANDOM, - RANDOM_SANS_CONSISTENCY, - EVERY_OTHER, - EVERY_OTHER_SANS_CONSISTENCY - ); - private static final List INTERRUPTS = Arrays.asList(RebuildingScannerTestHelper.INTERRUPT.values()); - // @formatter:on - - @RunWith(Arquillian.class) - public static class ShardRange extends GroupingTestWithModel { - - @Override - protected BaseQueryResponse runTestQueryWithGrouping(Map expected, String querystr, Date startDate, Date endDate, - Map extraParms, RebuildingScannerTestHelper.TEARDOWN teardown, RebuildingScannerTestHelper.INTERRUPT interrupt) - throws Exception { - QueryTestTableHelper qtth = new QueryTestTableHelper(ShardRange.class.getName(), log, teardown, interrupt); - AccumuloClient client = qtth.client; - VisibilityWiseGuysIngestWithModel.writeItAll(client, VisibilityWiseGuysIngestWithModel.WhatKindaRange.SHARD); - PrintUtility.printTable(client, auths, TableName.SHARD); - PrintUtility.printTable(client, auths, TableName.SHARD_INDEX); - PrintUtility.printTable(client, auths, QueryTestTableHelper.MODEL_TABLE_NAME); - return super.runTestQueryWithGrouping(expected, querystr, startDate, endDate, extraParms, client); - } - } - - @RunWith(Arquillian.class) - public static class DocumentRange extends GroupingTestWithModel { - - @Override - protected BaseQueryResponse runTestQueryWithGrouping(Map expected, String querystr, Date startDate, Date endDate, - Map extraParms, RebuildingScannerTestHelper.TEARDOWN teardown, RebuildingScannerTestHelper.INTERRUPT interrupt) - throws Exception { - QueryTestTableHelper qtth = new QueryTestTableHelper(DocumentRange.class.toString(), log, teardown, interrupt); - AccumuloClient client = qtth.client; - VisibilityWiseGuysIngestWithModel.writeItAll(client, VisibilityWiseGuysIngestWithModel.WhatKindaRange.DOCUMENT); - PrintUtility.printTable(client, auths, TableName.SHARD); - PrintUtility.printTable(client, auths, TableName.SHARD_INDEX); - PrintUtility.printTable(client, auths, QueryTestTableHelper.MODEL_TABLE_NAME); - return super.runTestQueryWithGrouping(expected, querystr, startDate, endDate, extraParms, client); - } - } - - @Rule - public TemporaryFolder temporaryFolder = new TemporaryFolder(); - - protected Set authSet = Collections.singleton(auths); - - @Inject - @SpringBean(name = "EventQuery") - protected ShardQueryLogic logic; - - protected KryoDocumentDeserializer deserializer; - - private final DateFormat format = new SimpleDateFormat("yyyyMMdd"); - - @Deployment - public static JavaArchive createDeployment() { - - return ShrinkWrap.create(JavaArchive.class) - .addPackages(true, "org.apache.deltaspike", "io.astefanutti.metrics.cdi", "datawave.query", "org.jboss.logging", - "datawave.webservice.query.result.event") - .deleteClass(DefaultEdgeEventQueryLogic.class).deleteClass(RemoteEdgeDictionary.class) - .deleteClass(datawave.query.metrics.QueryMetricQueryLogic.class).deleteClass(datawave.query.metrics.ShardTableQueryMetricHandler.class) - .addAsManifestResource(new StringAsset( - "" + "datawave.query.tables.edge.MockAlternative" + ""), - "beans.xml"); - } - - @AfterClass - public static void teardown() { - TypeRegistry.reset(); - } - - @Before - public void setup() { - TimeZone.setDefault(TimeZone.getTimeZone("GMT")); - - logic.setFullTableScanEnabled(true); - logic.setMaxEvaluationPipelines(1); - logic.setQueryExecutionForPageTimeout(300000000000000L); - deserializer = new KryoDocumentDeserializer(); - } - - protected abstract BaseQueryResponse runTestQueryWithGrouping(Map expected, String querystr, Date startDate, Date endDate, - Map extraParms, RebuildingScannerTestHelper.TEARDOWN teardown, RebuildingScannerTestHelper.INTERRUPT interrupt) - throws Exception; - - protected BaseQueryResponse runTestQueryWithGrouping(Map expected, String querystr, Date startDate, Date endDate, - Map extraParms, AccumuloClient client) throws Exception { - log.debug("runTestQueryWithGrouping"); - - QueryImpl settings = new QueryImpl(); - settings.setBeginDate(startDate); - settings.setEndDate(endDate); - settings.setPagesize(Integer.MAX_VALUE); - settings.setQueryAuthorizations(auths.serialize()); - settings.setQuery(querystr); - settings.setParameters(extraParms); - settings.setId(UUID.randomUUID()); - - log.debug("query: " + settings.getQuery()); - log.debug("logic: " + settings.getQueryLogicName()); - - GenericQueryConfiguration config = logic.initialize(client, settings, authSet); - logic.setupQuery(config); - - DocumentTransformer transformer = (DocumentTransformer) (logic.getTransformer(settings)); - TransformIterator iter = new DatawaveTransformIterator(logic.iterator(), transformer); - List eventList = new ArrayList<>(); - while (iter.hasNext()) { - eventList.add(iter.next()); - } - - BaseQueryResponse response = transformer.createResponse(eventList); - - // un-comment to look at the json output - ObjectMapper mapper = new ObjectMapper(); - mapper.enable(MapperFeature.USE_WRAPPER_NAME_AS_PROPERTY_NAME); - mapper.writeValue(temporaryFolder.newFile(), response); - - Assert.assertTrue(response instanceof DefaultEventQueryResponse); - DefaultEventQueryResponse eventQueryResponse = (DefaultEventQueryResponse) response; - - Assert.assertEquals("Got the wrong number of events", expected.size(), (long) eventQueryResponse.getReturnedEvents()); - - for (EventBase event : eventQueryResponse.getEvents()) { - - String genderKey = ""; - String ageKey = ""; - Integer value = null; - for (Object field : event.getFields()) { - FieldBase fieldBase = (FieldBase) field; - switch (fieldBase.getName()) { - case "COUNT": - value = Integer.valueOf(fieldBase.getValueString()); - break; - case "GENDER": - genderKey = fieldBase.getValueString(); - break; - case "GEN": - genderKey = fieldBase.getValueString(); - break; - case "AGE": - ageKey = fieldBase.getValueString(); - break; - case "AG": - ageKey = fieldBase.getValueString(); - break; - } - } - - log.debug("mapping is " + genderKey + "-" + ageKey + " count:" + value); - String key; - if (!genderKey.isEmpty() && !ageKey.isEmpty()) { - key = genderKey + "-" + ageKey; - } else if (!genderKey.isEmpty()) { - key = genderKey; - } else { - key = ageKey; - } - Assert.assertEquals(expected.get(key), value); - } - return response; - } - - @Test - public void testGrouping() throws Exception { - Map extraParameters = new HashMap<>(); - - Date startDate = format.parse("20091231"); - Date endDate = format.parse("20150101"); - - String queryString = "UUID =~ '^[CS].*'"; - - // @formatter:off - Map expectedMap = ImmutableMap. builder() - .put("FEMALE-18", 2) - .put("MALE-30", 1) - .put("MALE-34", 1) - .put("MALE-16", 1) - .put("MALE-40", 2) - .put("MALE-20", 2) - .put("MALE-24", 1) - .put("MALE-22", 2) - .build(); - // @formatter:on - - extraParameters.put("group.fields", "AG,GEN"); - extraParameters.put("group.fields.batch.size", "6"); - - List> responseEvents = new ArrayList<>(); - for (RebuildingScannerTestHelper.TEARDOWN teardown : TEARDOWNS) { - for (RebuildingScannerTestHelper.INTERRUPT interrupt : INTERRUPTS) { - responseEvents.add(((DefaultEventQueryResponse) runTestQueryWithGrouping(expectedMap, queryString, startDate, endDate, extraParameters, - teardown, interrupt)).getEvents()); - } - } - List digested = digest(responseEvents); - log.debug("reponses:" + digested); - Set responseSet = Sets.newHashSet(digested); - // if the grouped results from every type of rebuild are the same, there should be only 1 entry in the responseSet - Assert.assertEquals(responseSet.size(), 1); - } - - // grab the relevant stuff from the events and do some formatting - private List digest(List> in) { - List stringList = new ArrayList<>(); - for (List list : in) { - StringBuilder builder = new StringBuilder(); - for (EventBase eb : list) { - for (Object field : eb.getFields()) { - FieldBase fieldBase = (FieldBase) field; - builder.append(fieldBase.getName()); - builder.append(':'); - builder.append(fieldBase.getTypedValue().getValue()); - builder.append(','); - } - } - stringList.add(builder.toString() + '\n'); - } - return stringList; - } - - @Test - public void testGrouping2() throws Exception { - Map extraParameters = new HashMap<>(); - - Date startDate = format.parse("20091231"); - Date endDate = format.parse("20150101"); - - String queryString = "UUID =~ '^[CS].*'"; - - // @formatter:off - Map expectedMap = ImmutableMap. builder() - .put("18", 2) - .put("30", 1) - .put("34", 1) - .put("16", 1) - .put("40", 2) - .put("20", 2) - .put("24", 1) - .put("22", 2) - .build(); - // @formatter:on - extraParameters.put("group.fields", "AG"); - extraParameters.put("group.fields.batch.size", "6"); - - for (RebuildingScannerTestHelper.TEARDOWN teardown : TEARDOWNS) { - for (RebuildingScannerTestHelper.INTERRUPT interrupt : INTERRUPTS) { - runTestQueryWithGrouping(expectedMap, queryString, startDate, endDate, extraParameters, teardown, interrupt); - } - } - } - - @Test - public void testGrouping3() throws Exception { - Map extraParameters = new HashMap<>(); - - Date startDate = format.parse("20091231"); - Date endDate = format.parse("20150101"); - - String queryString = "UUID =~ '^[CS].*'"; - - Map expectedMap = ImmutableMap.of("MALE", 10, "FEMALE", 2); - - extraParameters.put("group.fields", "GEN"); - extraParameters.put("group.fields.batch.size", "6"); - - for (RebuildingScannerTestHelper.TEARDOWN teardown : TEARDOWNS) { - for (RebuildingScannerTestHelper.INTERRUPT interrupt : INTERRUPTS) { - runTestQueryWithGrouping(expectedMap, queryString, startDate, endDate, extraParameters, teardown, interrupt); - } - } - } - - @Test - public void testGrouping4() throws Exception { - Map extraParameters = new HashMap<>(); - - Date startDate = format.parse("20091231"); - Date endDate = format.parse("20150101"); - - String queryString = "UUID =~ '^[CS].*'"; - - Map expectedMap = ImmutableMap.of("MALE", 10, "FEMALE", 2); - - extraParameters.put("group.fields", "GEN"); - extraParameters.put("group.fields.batch.size", "0"); - - for (RebuildingScannerTestHelper.TEARDOWN teardown : TEARDOWNS) { - for (RebuildingScannerTestHelper.INTERRUPT interrupt : INTERRUPTS) { - runTestQueryWithGrouping(expectedMap, queryString, startDate, endDate, extraParameters, teardown, interrupt); - } - } - } - - @Test - public void testGroupingUsingFunction() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("group.fields.batch.size", "6"); - - Date startDate = format.parse("20091231"); - Date endDate = format.parse("20150101"); - - String queryString = "UUID =~ '^[CS].*' && f:groupby('AG','GEN')"; - - // @formatter:off - Map expectedMap = ImmutableMap. builder() - .put("FEMALE-18", 2) - .put("MALE-30", 1) - .put("MALE-34", 1) - .put("MALE-16", 1) - .put("MALE-40", 2) - .put("MALE-20", 2) - .put("MALE-24", 1) - .put("MALE-22", 2) - .build(); - // @formatter:on - - for (RebuildingScannerTestHelper.TEARDOWN teardown : TEARDOWNS) { - for (RebuildingScannerTestHelper.INTERRUPT interrupt : INTERRUPTS) { - runTestQueryWithGrouping(expectedMap, queryString, startDate, endDate, extraParameters, teardown, interrupt); - } - } - } - - @Test - public void testGroupingUsingLuceneFunction() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("group.fields.batch.size", "6"); - - Date startDate = format.parse("20091231"); - Date endDate = format.parse("20150101"); - - String queryString = "(UUID:C* or UUID:S* ) and #GROUPBY('AG','GEN')"; - - // @formatter:off - Map expectedMap = ImmutableMap. builder() - .put("FEMALE-18", 2) - .put("MALE-30", 1) - .put("MALE-34", 1) - .put("MALE-16", 1) - .put("MALE-40", 2) - .put("MALE-20", 2) - .put("MALE-24", 1) - .put("MALE-22", 2) - .build(); - // @formatter:on - logic.setParser(new LuceneToJexlQueryParser()); - for (RebuildingScannerTestHelper.TEARDOWN teardown : TEARDOWNS) { - for (RebuildingScannerTestHelper.INTERRUPT interrupt : INTERRUPTS) { - runTestQueryWithGrouping(expectedMap, queryString, startDate, endDate, extraParameters, teardown, interrupt); - } - } - logic.setParser(new JexlControlledQueryParser()); - } - - @Test - public void testCountingMap() { - MarkingFunctions markingFunctions = new MarkingFunctions.Default(); - GroupCountingHashMap map = new GroupCountingHashMap(markingFunctions); - GroupingTypeAttribute attr1 = new GroupingTypeAttribute(new LcType("FOO"), new Key("FOO"), true); - attr1.setColumnVisibility(new ColumnVisibility("A")); - map.add(Collections.singleton(attr1)); - - GroupingTypeAttribute attr2 = new GroupingTypeAttribute(new LcType("FOO"), new Key("FOO"), true); - attr2.setColumnVisibility(new ColumnVisibility("B")); - map.add(Collections.singleton(attr2)); - GroupingTypeAttribute attr3 = new GroupingTypeAttribute(new LcType("BAR"), new Key("BAR"), true); - attr3.setColumnVisibility(new ColumnVisibility("C")); - map.add(Collections.singleton(attr3)); - - log.debug("map is: " + map); - - for (Map.Entry>,Integer> entry : map.entrySet()) { - Attribute attr = entry.getKey().iterator().next(); // the first and only one - int count = entry.getValue(); - if (attr.getData().toString().equals("FOO")) { - Assert.assertEquals(2, count); - Assert.assertEquals(new ColumnVisibility("A&B"), attr.getColumnVisibility()); - } else if (attr.getData().toString().equals("BAR")) { - Assert.assertEquals(1, count); - Assert.assertEquals(new ColumnVisibility("C"), attr.getColumnVisibility()); - } - } - } - - @Test - public void testCountingMapAgain() { - MarkingFunctions markingFunctions = new MarkingFunctions.Default(); - GroupCountingHashMap map = new GroupCountingHashMap(markingFunctions); - - GroupingTypeAttribute attr1a = new GroupingTypeAttribute(new LcType("FOO"), new Key("NAME"), true); - attr1a.setColumnVisibility(new ColumnVisibility("A")); - GroupingTypeAttribute attr1b = new GroupingTypeAttribute(new NumberType("5"), new Key("AGE"), true); - attr1b.setColumnVisibility(new ColumnVisibility("C")); - Set> seta = Sets.newHashSet(attr1a, attr1b); - map.add(seta); - - GroupingTypeAttribute attr2a = new GroupingTypeAttribute(new LcType("FOO"), new Key("NAME"), true); - attr2a.setColumnVisibility(new ColumnVisibility("B")); - GroupingTypeAttribute attr2b = new GroupingTypeAttribute(new NumberType("5"), new Key("AGE"), true); - attr2b.setColumnVisibility(new ColumnVisibility("D")); - Set> setb = Sets.newHashSet(attr2a, attr2b); - map.add(setb); - - // even though the ColumnVisibilities are different, the 2 collections seta and setb are 'equal' and generate the same hashCode - Assert.assertEquals(seta.hashCode(), setb.hashCode()); - Assert.assertEquals(seta, setb); - - GroupingTypeAttribute attr3a = new GroupingTypeAttribute(new LcType("BAR"), new Key("NAME"), true); - attr3a.setColumnVisibility(new ColumnVisibility("C")); - GroupingTypeAttribute attr3b = new GroupingTypeAttribute(new NumberType("6"), new Key("AGE"), true); - attr3b.setColumnVisibility(new ColumnVisibility("D")); - map.add(Sets.newHashSet(attr3a, attr3b)); - - log.debug("map is: " + map); - - for (Map.Entry>,Integer> entry : map.entrySet()) { - for (Attribute attr : entry.getKey()) { - int count = entry.getValue(); - if (attr.getData().toString().equals("FOO")) { - Assert.assertEquals(2, count); - // the ColumnVisibility for the key was changed to the merged value of the 2 items that were added to the map - Assert.assertEquals(new ColumnVisibility("A&B"), attr.getColumnVisibility()); - } else if (attr.getData().toString().equals("5")) { - Assert.assertEquals(2, count); - // the ColumnVisibility for the key was changed to the merged value of the 2 items that were added to the map - Assert.assertEquals(new ColumnVisibility("C&D"), attr.getColumnVisibility()); - } else if (attr.getData().toString().equals("BAR")) { - Assert.assertEquals(1, count); - Assert.assertEquals(new ColumnVisibility("C"), attr.getColumnVisibility()); - } else if (attr.getData().toString().equals("6")) { - Assert.assertEquals(1, count); - Assert.assertEquals(new ColumnVisibility("D"), attr.getColumnVisibility()); - } - } - } - } -} diff --git a/warehouse/query-core/src/test/java/datawave/query/util/VisibilityWiseGuysIngest.java b/warehouse/query-core/src/test/java/datawave/query/util/VisibilityWiseGuysIngest.java index 262e901e28..7c09338c35 100644 --- a/warehouse/query-core/src/test/java/datawave/query/util/VisibilityWiseGuysIngest.java +++ b/warehouse/query-core/src/test/java/datawave/query/util/VisibilityWiseGuysIngest.java @@ -43,6 +43,10 @@ public enum WhatKindaRange { public static final String sopranoUID = UID.builder().newId("Soprano".getBytes(), (Date) null).toString(); public static final String caponeUID = UID.builder().newId("Capone".getBytes(), (Date) null).toString(); + public static void writeItAll(AccumuloClient client, String range) throws Exception { + writeItAll(client, WhatKindaRange.valueOf(range)); + } + public static void writeItAll(AccumuloClient client, WhatKindaRange range) throws Exception { BatchWriter bw = null; @@ -54,59 +58,59 @@ public static void writeItAll(AccumuloClient client, WhatKindaRange range) throw bw = client.createBatchWriter(TableName.SHARD, bwConfig); mutation = new Mutation(shard); - mutation.put(datatype + "\u0000" + corleoneUID, "NAME.0" + "\u0000" + "SANTINO", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "NAME.1" + "\u0000" + "FREDO", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "NAME.2" + "\u0000" + "MICHAEL", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "NAME.3" + "\u0000" + "CONSTANZIA", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "NAME.4" + "\u0000" + "LUCA", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "NAME.5" + "\u0000" + "VINCENT", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "GENDER.0" + "\u0000" + "MALE", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "GENDER.1" + "\u0000" + "MALE", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "GENDER.2" + "\u0000" + "MALE", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "GENDER.3" + "\u0000" + "FEMALE", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "GENDER.4" + "\u0000" + "MALE", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "GENDER.5" + "\u0000" + "MALE", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "AGE.0" + "\u0000" + "24", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "AGE.1" + "\u0000" + "22", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "AGE.2" + "\u0000" + "20", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "AGE.3" + "\u0000" + "18", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "AGE.4" + "\u0000" + "40", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "AGE.5" + "\u0000" + "22", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "BIRTHDAY.0" + "\u0000" + "1", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "BIRTHDAY.1" + "\u0000" + "2", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "BIRTHDAY.2" + "\u0000" + "3", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "BIRTHDAY.3" + "\u0000" + "4", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "BIRTHDAY.4" + "\u0000" + "5", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "BIRTHDAY.5" + "\u0000" + "22", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "UUID.0" + "\u0000" + "CORLEONE", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "NAME.FOO.0" + "\u0000" + "SANTINO", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "NAME.FOO.1" + "\u0000" + "FREDO", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "NAME.FOO.2" + "\u0000" + "MICHAEL", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "NAME.FOO.3" + "\u0000" + "CONSTANZIA", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "NAME.FOO.4" + "\u0000" + "LUCA", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "NAME.FOO.5" + "\u0000" + "VINCENT", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "GENDER.FOO.0" + "\u0000" + "MALE", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "GENDER.FOO.1" + "\u0000" + "MALE", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "GENDER.FOO.2" + "\u0000" + "MALE", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "GENDER.FOO.3" + "\u0000" + "FEMALE", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "GENDER.FOO.4" + "\u0000" + "MALE", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "GENDER.FOO.5" + "\u0000" + "MALE", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "AGE.FOO.0" + "\u0000" + "24", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "AGE.FOO.1" + "\u0000" + "22", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "AGE.FOO.2" + "\u0000" + "20", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "AGE.FOO.3" + "\u0000" + "18", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "AGE.FOO.4" + "\u0000" + "40", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "AGE.FOO.5" + "\u0000" + "22", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "BIRTHDAY.FOO.0" + "\u0000" + "1", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "BIRTHDAY.FOO.1" + "\u0000" + "2", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "BIRTHDAY.FOO.2" + "\u0000" + "3", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "BIRTHDAY.FOO.3" + "\u0000" + "4", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "BIRTHDAY.FOO.4" + "\u0000" + "5", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "BIRTHDAY.FOO.5" + "\u0000" + "22", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "UUID.FOO.0" + "\u0000" + "CORLEONE", columnVisibilityItalian, timeStamp, emptyValue); mutation.put(datatype + "\u0000" + corleoneUID, "RECORD" + "\u0000" + "1", columnVisibilityItalian, timeStamp, emptyValue); mutation.put(datatype + "\u0000" + corleoneUID, "RECORD" + "\u0000" + "2", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + sopranoUID, "NAME.0" + "\u0000" + "ANTHONY", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + sopranoUID, "NAME.1" + "\u0000" + "MEADOW", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + sopranoUID, "GENDER.0" + "\u0000" + "MALE", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + sopranoUID, "GENDER.1" + "\u0000" + "FEMALE", columnVisibilityEnglish, timeStamp, emptyValue); - // to test whether singleton values correctly get matched using the function set methods, only add AGE.1 - // mutation.put(datatype + "\u0000" + sopranoUID, "AGE.0" + "\u0000" + "16", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + sopranoUID, "AGE.0" + "\u0000" + "16", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + sopranoUID, "AGE.1" + "\u0000" + "18", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + sopranoUID, "UUID.0" + "\u0000" + "SOPRANO", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + sopranoUID, "NAME.FOO.0" + "\u0000" + "ANTHONY", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + sopranoUID, "NAME.FOO.1" + "\u0000" + "MEADOW", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + sopranoUID, "GENDER.FOO.0" + "\u0000" + "MALE", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + sopranoUID, "GENDER.FOO.1" + "\u0000" + "FEMALE", columnVisibilityEnglish, timeStamp, emptyValue); + // to test whether singleton values correctly get matched using the function set methods, only add AGE.FOO.1 + // mutation.put(datatype + "\u0000" + sopranoUID, "AGE.FOO.0" + "\u0000" + "16", columnVisibility, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + sopranoUID, "AGE.FOO.0" + "\u0000" + "16", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + sopranoUID, "AGE.FOO.1" + "\u0000" + "18", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + sopranoUID, "UUID.FOO.0" + "\u0000" + "SOPRANO", columnVisibilityEnglish, timeStamp, emptyValue); mutation.put(datatype + "\u0000" + sopranoUID, "RECORD" + "\u0000" + "1", columnVisibilityItalian, timeStamp, emptyValue); mutation.put(datatype + "\u0000" + sopranoUID, "RECORD" + "\u0000" + "2", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "NAME.0" + "\u0000" + "ALPHONSE", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "NAME.1" + "\u0000" + "FRANK", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "NAME.2" + "\u0000" + "RALPH", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "NAME.3" + "\u0000" + "MICHAEL", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "GENDER.0" + "\u0000" + "MALE", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "GENDER.1" + "\u0000" + "MALE", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "GENDER.2" + "\u0000" + "MALE", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "GENDER.3" + "\u0000" + "MALE", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "AGE.0" + "\u0000" + "30", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "AGE.1" + "\u0000" + "34", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "AGE.2" + "\u0000" + "20", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "AGE.3" + "\u0000" + "40", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "UUID.0" + "\u0000" + "CAPONE", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "NAME.FOO.0" + "\u0000" + "ALPHONSE", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "NAME.FOO.1" + "\u0000" + "FRANK", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "NAME.FOO.2" + "\u0000" + "RALPH", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "NAME.FOO.3" + "\u0000" + "MICHAEL", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "GENDER.FOO.0" + "\u0000" + "MALE", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "GENDER.FOO.1" + "\u0000" + "MALE", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "GENDER.FOO.2" + "\u0000" + "MALE", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "GENDER.FOO.3" + "\u0000" + "MALE", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "AGE.FOO.0" + "\u0000" + "30", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "AGE.FOO.1" + "\u0000" + "34", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "AGE.FOO.2" + "\u0000" + "20", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "AGE.FOO.3" + "\u0000" + "40", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "UUID.FOO.0" + "\u0000" + "CAPONE", columnVisibilityEnglish, timeStamp, emptyValue); mutation.put(datatype + "\u0000" + caponeUID, "RECORD" + "\u0000" + "1", columnVisibilityItalian, timeStamp, emptyValue); mutation.put(datatype + "\u0000" + caponeUID, "RECORD" + "\u0000" + "2", columnVisibilityItalian, timeStamp, emptyValue); mutation.put(datatype + "\u0000" + caponeUID, "RECORD" + "\u0000" + "3", columnVisibilityItalian, timeStamp, emptyValue); diff --git a/warehouse/query-core/src/test/java/datawave/query/util/VisibilityWiseGuysIngestWithModel.java b/warehouse/query-core/src/test/java/datawave/query/util/VisibilityWiseGuysIngestWithModel.java index be2ffc3344..d38f646e98 100644 --- a/warehouse/query-core/src/test/java/datawave/query/util/VisibilityWiseGuysIngestWithModel.java +++ b/warehouse/query-core/src/test/java/datawave/query/util/VisibilityWiseGuysIngestWithModel.java @@ -67,6 +67,10 @@ protected static String normalizerForColumn(String column) { } } + public static void writeItAll(AccumuloClient client, String range) throws Exception { + writeItAll(client, WhatKindaRange.valueOf(range)); + } + public static void writeItAll(AccumuloClient client, WhatKindaRange range) throws Exception { BatchWriter bw = null; @@ -78,26 +82,26 @@ public static void writeItAll(AccumuloClient client, WhatKindaRange range) throw bw = client.createBatchWriter(TableName.SHARD, bwConfig); mutation = new Mutation(shard); - mutation.put(datatype + "\u0000" + corleoneUID, "NOME.0" + "\u0000" + "SANTINO", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "NOME.1" + "\u0000" + "FREDO", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "NOME.2" + "\u0000" + "MICHAEL", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "NOME.3" + "\u0000" + "CONSTANZIA", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "NOME.4" + "\u0000" + "LUCA", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "NOME.5" + "\u0000" + "VINCENT", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "GENERE.0" + "\u0000" + "MALE", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "GENERE.1" + "\u0000" + "MALE", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "GENERE.2" + "\u0000" + "MALE", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "GENERE.3" + "\u0000" + "FEMALE", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "GENERE.4" + "\u0000" + "MALE", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "GENERE.5" + "\u0000" + "MALE", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "ETA.0" + "\u0000" + "24", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "ETA.1" + "\u0000" + "22", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "ETA.2" + "\u0000" + "20", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "ETA.3" + "\u0000" + "18", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "ETA.4" + "\u0000" + "40", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "ETA.5" + "\u0000" + "22", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "MAGIC.0" + "\u0000" + "18", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "UUID.0" + "\u0000" + "CORLEONE", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "NOME.FOO.0" + "\u0000" + "SANTINO", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "NOME.FOO.1" + "\u0000" + "FREDO", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "NOME.FOO.2" + "\u0000" + "MICHAEL", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "NOME.FOO.3" + "\u0000" + "CONSTANZIA", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "NOME.FOO.4" + "\u0000" + "LUCA", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "NOME.FOO.5" + "\u0000" + "VINCENT", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "GENERE.FOO.0" + "\u0000" + "MALE", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "GENERE.FOO.1" + "\u0000" + "MALE", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "GENERE.FOO.2" + "\u0000" + "MALE", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "GENERE.FOO.3" + "\u0000" + "FEMALE", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "GENERE.FOO.4" + "\u0000" + "MALE", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "GENERE.FOO.5" + "\u0000" + "MALE", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "ETA.FOO.0" + "\u0000" + "24", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "ETA.FOO.1" + "\u0000" + "22", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "ETA.FOO.2" + "\u0000" + "20", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "ETA.FOO.3" + "\u0000" + "18", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "ETA.FOO.4" + "\u0000" + "40", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "ETA.FOO.5" + "\u0000" + "22", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "MAGIC.FOO.0" + "\u0000" + "18", columnVisibilityItalian, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "UUID.FOO.0" + "\u0000" + "CORLEONE", columnVisibilityItalian, timeStamp, emptyValue); // CORLEONE date delta is 70 years mutation.put(datatype + "\u0000" + corleoneUID, "BIRTH_DATE" + "\u0000" + "1930-12-28T00:00:05.000Z", columnVisibilityItalian, timeStamp, emptyValue); @@ -106,16 +110,16 @@ public static void writeItAll(AccumuloClient client, WhatKindaRange range) throw mutation.put(datatype + "\u0000" + corleoneUID, "QUOTE" + "\u0000" + "Im gonna make him an offer he cant refuse", columnVisibilityItalian, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + sopranoUID, "NAME.0" + "\u0000" + "ANTHONY", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + sopranoUID, "NAME.1" + "\u0000" + "MEADOW", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + sopranoUID, "GENDER.0" + "\u0000" + "MALE", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + sopranoUID, "GENDER.1" + "\u0000" + "FEMALE", columnVisibilityEnglish, timeStamp, emptyValue); - // to test whether singleton values correctly get matched using the function set methods, only add AGE.1 - // mutation.put(datatype + "\u0000" + sopranoUID, "AGE.0" + "\u0000" + "16", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + sopranoUID, "AGE.0" + "\u0000" + "16", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + sopranoUID, "AGE.1" + "\u0000" + "18", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + sopranoUID, "MAGIC.0" + "\u0000" + "18", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + sopranoUID, "UUID.0" + "\u0000" + "SOPRANO", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + sopranoUID, "NAME.FOO.0" + "\u0000" + "ANTHONY", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + sopranoUID, "NAME.FOO.1" + "\u0000" + "MEADOW", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + sopranoUID, "GENDER.FOO.0" + "\u0000" + "MALE", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + sopranoUID, "GENDER.FOO.1" + "\u0000" + "FEMALE", columnVisibilityEnglish, timeStamp, emptyValue); + // to test whether singleton values correctly get matched using the function set methods, only add AGE.FOO.1 + // mutation.put(datatype + "\u0000" + sopranoUID, "AGE.FOO.0" + "\u0000" + "16", columnVisibility, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + sopranoUID, "AGE.FOO.0" + "\u0000" + "16", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + sopranoUID, "AGE.FOO.1" + "\u0000" + "18", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + sopranoUID, "MAGIC.FOO.0" + "\u0000" + "18", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + sopranoUID, "UUID.FOO.0" + "\u0000" + "SOPRANO", columnVisibilityEnglish, timeStamp, emptyValue); // soprano date delta is 50 years mutation.put(datatype + "\u0000" + sopranoUID, "BIRTH_DATE" + "\u0000" + "1950-12-28T00:00:05.000Z", columnVisibilityEnglish, timeStamp, emptyValue); @@ -124,28 +128,28 @@ public static void writeItAll(AccumuloClient client, WhatKindaRange range) throw mutation.put(datatype + "\u0000" + sopranoUID, "QUOTE" + "\u0000" + "If you can quote the rules then you can obey them", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "NAME.0" + "\u0000" + "ALPHONSE", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "NAME.1" + "\u0000" + "FRANK", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "NAME.2" + "\u0000" + "RALPH", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "NAME.3" + "\u0000" + "MICHAEL", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "GENDER.0" + "\u0000" + "MALE", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "GENDER.1" + "\u0000" + "MALE", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "GENDER.2" + "\u0000" + "MALE", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "GENDER.3" + "\u0000" + "MALE", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "AGE.0" + "\u0000" + "30", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "AGE.1" + "\u0000" + "34", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "AGE.2" + "\u0000" + "20", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "AGE.3" + "\u0000" + "40", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "MAGIC.0" + "\u0000" + "18", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "UUID.0" + "\u0000" + "CAPONE", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "NAME.FOO.0" + "\u0000" + "ALPHONSE", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "NAME.FOO.1" + "\u0000" + "FRANK", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "NAME.FOO.2" + "\u0000" + "RALPH", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "NAME.FOO.3" + "\u0000" + "MICHAEL", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "GENDER.FOO.0" + "\u0000" + "MALE", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "GENDER.FOO.1" + "\u0000" + "MALE", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "GENDER.FOO.2" + "\u0000" + "MALE", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "GENDER.FOO.3" + "\u0000" + "MALE", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "AGE.FOO.0" + "\u0000" + "30", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "AGE.FOO.1" + "\u0000" + "34", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "AGE.FOO.2" + "\u0000" + "20", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "AGE.FOO.3" + "\u0000" + "40", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "MAGIC.FOO.0" + "\u0000" + "18", columnVisibilityEnglish, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "UUID.FOO.0" + "\u0000" + "CAPONE", columnVisibilityEnglish, timeStamp, emptyValue); // capone date delta is 89 or 90 years - mutation.put(datatype + "\u0000" + caponeUID, "BIRTH_DATE.0" + "\u0000" + "1910-12-28T00:00:05.000Z", columnVisibilityEnglish, timeStamp, + mutation.put(datatype + "\u0000" + caponeUID, "BIRTH_DATE.FOO.0" + "\u0000" + "1910-12-28T00:00:05.000Z", columnVisibilityEnglish, timeStamp, emptyValue); // add a second date to test function taking an Iterable - mutation.put(datatype + "\u0000" + caponeUID, "BIRTH_DATE.1" + "\u0000" + "1911-12-28T00:00:05.000Z", columnVisibilityEnglish, timeStamp, + mutation.put(datatype + "\u0000" + caponeUID, "BIRTH_DATE.FOO.1" + "\u0000" + "1911-12-28T00:00:05.000Z", columnVisibilityEnglish, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "DEATH_DATE.0" + "\u0000" + "2000-12-28T00:00:05.000Z", columnVisibilityEnglish, timeStamp, + mutation.put(datatype + "\u0000" + caponeUID, "DEATH_DATE.FOO.0" + "\u0000" + "2000-12-28T00:00:05.000Z", columnVisibilityEnglish, timeStamp, emptyValue); mutation.put(datatype + "\u0000" + caponeUID, "QUOTE" + "\u0000" + "You can get much farther with a kind word and a gun than you can with a kind word alone", diff --git a/warehouse/query-core/src/test/java/datawave/test/GroupAssert.java b/warehouse/query-core/src/test/java/datawave/test/GroupAssert.java new file mode 100644 index 0000000000..23c4c7c2c2 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/test/GroupAssert.java @@ -0,0 +1,113 @@ +package datawave.test; + +import java.math.BigDecimal; +import java.util.Arrays; +import java.util.Collection; +import java.util.Objects; + +import org.apache.accumulo.core.security.ColumnVisibility; +import org.assertj.core.api.AbstractAssert; +import org.assertj.core.api.Assertions; + +import datawave.query.attributes.Attribute; +import datawave.query.common.grouping.AggregateOperation; +import datawave.query.common.grouping.Aggregator; +import datawave.query.common.grouping.FieldAggregator; +import datawave.query.common.grouping.Group; +import datawave.query.common.grouping.GroupingAttribute; + +public class GroupAssert extends AbstractAssert { + + public static GroupAssert assertThat(Group group) { + return new GroupAssert(group); + } + + protected GroupAssert(Group group) { + super(group, GroupAssert.class); + } + + public GroupAssert hasCount(int count) { + isNotNull(); + if (actual.getCount() != count) { + failWithMessage("Expected count to be %s but was %s", count, actual.getCount()); + } + return this; + } + + public GroupAssert hasVisibilitiesForKey(GroupingAttribute key, ColumnVisibility... visibilities) { + isNotNull(); + Collection actualVisibilities = actual.getVisibilitiesForAttribute(key); + Assertions.assertThat(actualVisibilities).isNotNull().withFailMessage("Expected column visibilties for %s to contain exactly %s but was %s", key, + Arrays.toString(visibilities), actualVisibilities).containsExactlyInAnyOrder(visibilities); + return this; + } + + public GroupAssert hasDocumentVisibilities(ColumnVisibility... visibilities) { + isNotNull(); + Collection actualVisibilities = actual.getDocumentVisibilities(); + Assertions.assertThat(actualVisibilities).isNotNull() + .withFailMessage("Expected document visibilities to contain exactly %s but was %s", Arrays.toString(visibilities), actualVisibilities) + .containsExactlyInAnyOrder(visibilities); + return this; + } + + public GroupAssert hasAggregatedSum(String field, BigDecimal sum) { + return hasAggregation(field, AggregateOperation.SUM, sum); + } + + public GroupAssert hasAggregatedMax(String field, Object data) { + assertAggregatedAttributeData(field, AggregateOperation.MAX, data); + return this; + } + + public GroupAssert hasAggregatedMin(String field, Object data) { + assertAggregatedAttributeData(field, AggregateOperation.MIN, data); + return this; + } + + private void assertAggregatedAttributeData(String field, AggregateOperation operation, Object data) { + Object aggregation = getAggregation(field, operation); + if (aggregation != null) { + Object actualData = ((Attribute) aggregation).getData(); + if (!Objects.equals(data, actualData)) { + failWithMessage("Expected %s for field %s to be %s but was %s", operation, field, data, actualData); + } + } else { + failWithMessage("Expected %s for %s to not be null", operation, field); + } + } + + public GroupAssert hasAggregatedCount(String field, long count) { + return hasAggregation(field, AggregateOperation.COUNT, count); + } + + public GroupAssert hasAggregatedAverage(String field, BigDecimal average) { + return hasAggregation(field, AggregateOperation.AVERAGE, average); + } + + public GroupAssert hasAggregation(String field, AggregateOperation operation, Object aggregation) { + Object actualAggregation = getAggregation(field, operation); + if (!Objects.equals(aggregation, actualAggregation)) { + failWithMessage("Expected %s for field %s to be %s but was %s", operation, field, aggregation, actualAggregation); + } + return this; + } + + private Object getAggregation(String field, AggregateOperation operation) { + isNotNull(); + FieldAggregator fieldAggregator = actual.getFieldAggregator(); + + if (fieldAggregator != null) { + Aggregator aggregator = fieldAggregator.getAggregator(field, operation); + if (aggregator != null) { + return aggregator.getAggregation(); + } else { + failWithMessage("No %s aggregator found for %s", operation, field); + } + } else { + failWithMessage("Expected field aggregator to not be null"); + } + return this; + } + +} diff --git a/warehouse/query-core/src/test/java/datawave/test/GroupsAssert.java b/warehouse/query-core/src/test/java/datawave/test/GroupsAssert.java new file mode 100644 index 0000000000..6de3829d0e --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/test/GroupsAssert.java @@ -0,0 +1,42 @@ +package datawave.test; + +import java.util.Arrays; +import java.util.Set; + +import org.assertj.core.api.AbstractAssert; +import org.assertj.core.util.Sets; + +import datawave.query.common.grouping.Grouping; +import datawave.query.common.grouping.GroupingAttribute; +import datawave.query.common.grouping.Groups; + +public class GroupsAssert extends AbstractAssert { + + public static GroupsAssert assertThat(Groups groups) { + return new GroupsAssert(groups); + } + + protected GroupsAssert(Groups groups) { + super(groups, GroupsAssert.class); + } + + public GroupsAssert hasTotalGroups(int total) { + isNotNull(); + if (total != actual.totalGroups()) { + failWithMessage("Expected %s total groups, but was %s", total, actual.totalGroups()); + } + return this; + } + + public GroupAssert assertGroup(GroupingAttribute... keyElements) { + isNotNull(); + Grouping grouping = new Grouping(); + grouping.addAll(Arrays.asList(keyElements)); + return GroupAssert.assertThat(actual.getGroup(grouping)); + } + + public GroupAssert assertGroup(Grouping grouping) { + isNotNull(); + return GroupAssert.assertThat(actual.getGroup(grouping)); + } +} diff --git a/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml b/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml index 188c0ec4ca..474c17aa34 100644 --- a/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml +++ b/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml @@ -45,6 +45,11 @@ + + + + + diff --git a/web-services/deploy/configuration/src/main/resources/datawave/query/QueryLogicFactory.xml b/web-services/deploy/configuration/src/main/resources/datawave/query/QueryLogicFactory.xml index 2a93416484..fb142291aa 100644 --- a/web-services/deploy/configuration/src/main/resources/datawave/query/QueryLogicFactory.xml +++ b/web-services/deploy/configuration/src/main/resources/datawave/query/QueryLogicFactory.xml @@ -62,6 +62,11 @@ + + + + + diff --git a/microservices/pom.xml b/microservices/pom.xml index 9280a30219..e36c5b7a4c 100644 --- a/microservices/pom.xml +++ b/microservices/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT gov.nsa.datawave.microservice datawave-microservice-build-parent diff --git a/microservices/services/pom.xml b/microservices/services/pom.xml index 96e2ae41ab..a25ba38643 100644 --- a/microservices/services/pom.xml +++ b/microservices/services/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.microservice datawave-microservice-build-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-microservice-service-build-parent pom diff --git a/microservices/starters/pom.xml b/microservices/starters/pom.xml index 92a1c9da98..aafabbe941 100644 --- a/microservices/starters/pom.xml +++ b/microservices/starters/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.microservice datawave-microservice-build-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-microservice-starter-build-parent pom diff --git a/pom.xml b/pom.xml index 1199d6dd51..f6fb0a3ee7 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 gov.nsa.datawave datawave-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT pom DataWave DataWave is a Java-based ingest and query framework that leverages Apache Accumulo to provide fast, secure access to your data. diff --git a/warehouse/accumulo-extensions/pom.xml b/warehouse/accumulo-extensions/pom.xml index 78bb8e50a0..3932405612 100644 --- a/warehouse/accumulo-extensions/pom.xml +++ b/warehouse/accumulo-extensions/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-accumulo-extensions ${project.artifactId} diff --git a/warehouse/assemble/datawave/pom.xml b/warehouse/assemble/datawave/pom.xml index 6ac549a5eb..78c60a7308 100644 --- a/warehouse/assemble/datawave/pom.xml +++ b/warehouse/assemble/datawave/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave assemble-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT assemble-datawave pom diff --git a/warehouse/assemble/pom.xml b/warehouse/assemble/pom.xml index c89dbba081..c47bd27672 100644 --- a/warehouse/assemble/pom.xml +++ b/warehouse/assemble/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT assemble-parent pom diff --git a/warehouse/assemble/webservice/pom.xml b/warehouse/assemble/webservice/pom.xml index cbcfd5bd74..472697b007 100644 --- a/warehouse/assemble/webservice/pom.xml +++ b/warehouse/assemble/webservice/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave assemble-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT assemble-webservice ${project.artifactId} diff --git a/warehouse/common/pom.xml b/warehouse/common/pom.xml index 8ec915c26a..1120d8779b 100644 --- a/warehouse/common/pom.xml +++ b/warehouse/common/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-common ${project.artifactId} diff --git a/warehouse/core/pom.xml b/warehouse/core/pom.xml index 87887b42ff..39033caea7 100644 --- a/warehouse/core/pom.xml +++ b/warehouse/core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-core jar diff --git a/warehouse/data-dictionary-core/pom.xml b/warehouse/data-dictionary-core/pom.xml index bf1e853ed7..08a9504422 100644 --- a/warehouse/data-dictionary-core/pom.xml +++ b/warehouse/data-dictionary-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-data-dictionary-core jar diff --git a/warehouse/edge-dictionary-core/pom.xml b/warehouse/edge-dictionary-core/pom.xml index bccd2109cf..0e66a8adfc 100644 --- a/warehouse/edge-dictionary-core/pom.xml +++ b/warehouse/edge-dictionary-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-edge-dictionary-core jar diff --git a/warehouse/edge-model-configuration-core/pom.xml b/warehouse/edge-model-configuration-core/pom.xml index 8d1df6c2d9..02c2f1c0c5 100644 --- a/warehouse/edge-model-configuration-core/pom.xml +++ b/warehouse/edge-model-configuration-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-edge-model-configuration-core jar diff --git a/warehouse/index-stats/pom.xml b/warehouse/index-stats/pom.xml index 3ffa47f68d..48dc0d24db 100644 --- a/warehouse/index-stats/pom.xml +++ b/warehouse/index-stats/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-index-stats jar diff --git a/warehouse/ingest-configuration/pom.xml b/warehouse/ingest-configuration/pom.xml index e0a7a8ebbc..bfa7ae2aef 100644 --- a/warehouse/ingest-configuration/pom.xml +++ b/warehouse/ingest-configuration/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-ingest-configuration diff --git a/warehouse/ingest-core/pom.xml b/warehouse/ingest-core/pom.xml index fd5b5bc5a5..c85cd46e7b 100644 --- a/warehouse/ingest-core/pom.xml +++ b/warehouse/ingest-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-ingest-core jar diff --git a/warehouse/ingest-csv/pom.xml b/warehouse/ingest-csv/pom.xml index 1de3661c48..469c5a3edc 100644 --- a/warehouse/ingest-csv/pom.xml +++ b/warehouse/ingest-csv/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-ingest-csv jar diff --git a/warehouse/ingest-json/pom.xml b/warehouse/ingest-json/pom.xml index 367c59a6f6..071865d3e2 100644 --- a/warehouse/ingest-json/pom.xml +++ b/warehouse/ingest-json/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-ingest-json jar diff --git a/warehouse/ingest-nyctlc/pom.xml b/warehouse/ingest-nyctlc/pom.xml index 8a6d357f87..615f06435d 100644 --- a/warehouse/ingest-nyctlc/pom.xml +++ b/warehouse/ingest-nyctlc/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-ingest-nyctlc jar diff --git a/warehouse/ingest-scripts/pom.xml b/warehouse/ingest-scripts/pom.xml index 85b823759e..f5fa216a2a 100644 --- a/warehouse/ingest-scripts/pom.xml +++ b/warehouse/ingest-scripts/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-ingest-scripts ${project.artifactId} diff --git a/warehouse/ingest-wikipedia/pom.xml b/warehouse/ingest-wikipedia/pom.xml index dbd2af856e..fa8b597b91 100644 --- a/warehouse/ingest-wikipedia/pom.xml +++ b/warehouse/ingest-wikipedia/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-ingest-wikipedia jar diff --git a/warehouse/metrics-core/pom.xml b/warehouse/metrics-core/pom.xml index c925612906..106b05001a 100644 --- a/warehouse/metrics-core/pom.xml +++ b/warehouse/metrics-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-metrics-core jar diff --git a/warehouse/ops-tools/config-compare/pom.xml b/warehouse/ops-tools/config-compare/pom.xml index 76ecac4778..83cc229db2 100644 --- a/warehouse/ops-tools/config-compare/pom.xml +++ b/warehouse/ops-tools/config-compare/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-ops-tools-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-ops-tools-config-compare diff --git a/warehouse/ops-tools/index-validation/pom.xml b/warehouse/ops-tools/index-validation/pom.xml index a4d762269b..822e8f2137 100644 --- a/warehouse/ops-tools/index-validation/pom.xml +++ b/warehouse/ops-tools/index-validation/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-ops-tools-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-ops-tools-index-validation jar diff --git a/warehouse/ops-tools/pom.xml b/warehouse/ops-tools/pom.xml index 7a96772ae3..7d4c0307b4 100644 --- a/warehouse/ops-tools/pom.xml +++ b/warehouse/ops-tools/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-ops-tools-parent pom diff --git a/warehouse/pom.xml b/warehouse/pom.xml index cf5997413d..8f4e14cdd5 100644 --- a/warehouse/pom.xml +++ b/warehouse/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-warehouse-parent pom diff --git a/warehouse/query-core/pom.xml b/warehouse/query-core/pom.xml index b503c334e9..9051a0c515 100644 --- a/warehouse/query-core/pom.xml +++ b/warehouse/query-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-query-core jar diff --git a/warehouse/regression-testing/pom.xml b/warehouse/regression-testing/pom.xml index f93d56e7a1..46c31eefdd 100644 --- a/warehouse/regression-testing/pom.xml +++ b/warehouse/regression-testing/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-regression-testing ${project.artifactId} diff --git a/web-services/accumulo/pom.xml b/web-services/accumulo/pom.xml index f7490c1af5..5c231fe4bb 100644 --- a/web-services/accumulo/pom.xml +++ b/web-services/accumulo/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-ws-accumulo ejb diff --git a/web-services/atom/pom.xml b/web-services/atom/pom.xml index 20ea43faea..d376a08f2c 100644 --- a/web-services/atom/pom.xml +++ b/web-services/atom/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-ws-atom ejb diff --git a/web-services/cached-results/pom.xml b/web-services/cached-results/pom.xml index 2200dcfaa1..49940b838e 100644 --- a/web-services/cached-results/pom.xml +++ b/web-services/cached-results/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-ws-cached-results ejb diff --git a/web-services/client/pom.xml b/web-services/client/pom.xml index 15e432e4f7..183ffbc9fb 100644 --- a/web-services/client/pom.xml +++ b/web-services/client/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-ws-client jar diff --git a/web-services/common-util/pom.xml b/web-services/common-util/pom.xml index ccea4c2d5f..727136df2e 100644 --- a/web-services/common-util/pom.xml +++ b/web-services/common-util/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-ws-common-util jar diff --git a/web-services/common/pom.xml b/web-services/common/pom.xml index 4fe4e31991..c76f78a65d 100644 --- a/web-services/common/pom.xml +++ b/web-services/common/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-ws-common ejb diff --git a/web-services/deploy/application/pom.xml b/web-services/deploy/application/pom.xml index 0c872b820b..1ea3bf96b6 100644 --- a/web-services/deploy/application/pom.xml +++ b/web-services/deploy/application/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-deploy-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-ws-deploy-application ear diff --git a/web-services/deploy/configuration/pom.xml b/web-services/deploy/configuration/pom.xml index 89644c0829..550641f0cf 100644 --- a/web-services/deploy/configuration/pom.xml +++ b/web-services/deploy/configuration/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-deploy-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-ws-deploy-configuration jar diff --git a/web-services/deploy/docs/pom.xml b/web-services/deploy/docs/pom.xml index 07c40dc1b6..fd39ec227f 100644 --- a/web-services/deploy/docs/pom.xml +++ b/web-services/deploy/docs/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-deploy-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-ws-deploy-docs war diff --git a/web-services/deploy/pom.xml b/web-services/deploy/pom.xml index 5e11647e85..a06c2353cc 100644 --- a/web-services/deploy/pom.xml +++ b/web-services/deploy/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT gov.nsa.datawave.webservices datawave-ws-deploy-parent diff --git a/web-services/deploy/spring-framework-integration/pom.xml b/web-services/deploy/spring-framework-integration/pom.xml index 2c86de1292..be7c3fba17 100644 --- a/web-services/deploy/spring-framework-integration/pom.xml +++ b/web-services/deploy/spring-framework-integration/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-deploy-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT spring-framework-integration ${project.artifactId} diff --git a/web-services/dictionary/pom.xml b/web-services/dictionary/pom.xml index 22311b355b..cf464ebdb3 100644 --- a/web-services/dictionary/pom.xml +++ b/web-services/dictionary/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-ws-dictionary ejb diff --git a/web-services/examples/client-login/pom.xml b/web-services/examples/client-login/pom.xml index 7ab9b72a37..33b61757aa 100644 --- a/web-services/examples/client-login/pom.xml +++ b/web-services/examples/client-login/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-examples-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-ws-examples-client-login ejb diff --git a/web-services/examples/http-client/pom.xml b/web-services/examples/http-client/pom.xml index 5d45b143dc..dd881d6bbe 100644 --- a/web-services/examples/http-client/pom.xml +++ b/web-services/examples/http-client/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-examples-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-ws-examples-http-client jar diff --git a/web-services/examples/jms-client/pom.xml b/web-services/examples/jms-client/pom.xml index c15f201061..16b4062827 100644 --- a/web-services/examples/jms-client/pom.xml +++ b/web-services/examples/jms-client/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-examples-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-ws-examples-jms-client jar diff --git a/web-services/examples/pom.xml b/web-services/examples/pom.xml index a2f39c5b25..b2f96c2db1 100644 --- a/web-services/examples/pom.xml +++ b/web-services/examples/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-ws-examples-parent pom diff --git a/web-services/examples/query-war/pom.xml b/web-services/examples/query-war/pom.xml index 235a50f9f9..99df0b58dc 100644 --- a/web-services/examples/query-war/pom.xml +++ b/web-services/examples/query-war/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-examples-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-ws-examples-query-war war diff --git a/web-services/map-reduce-embedded/pom.xml b/web-services/map-reduce-embedded/pom.xml index 5fab4871e8..fd28d8a678 100644 --- a/web-services/map-reduce-embedded/pom.xml +++ b/web-services/map-reduce-embedded/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-ws-map-reduce-embedded jar diff --git a/web-services/map-reduce-status/pom.xml b/web-services/map-reduce-status/pom.xml index bdfb1ce16e..1f6a3cc05f 100644 --- a/web-services/map-reduce-status/pom.xml +++ b/web-services/map-reduce-status/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-ws-map-reduce-status ejb diff --git a/web-services/map-reduce/pom.xml b/web-services/map-reduce/pom.xml index af60f5641d..6fcd98a75f 100644 --- a/web-services/map-reduce/pom.xml +++ b/web-services/map-reduce/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-ws-map-reduce ejb diff --git a/web-services/model/pom.xml b/web-services/model/pom.xml index 616a9c2a1e..525527b6a4 100644 --- a/web-services/model/pom.xml +++ b/web-services/model/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-ws-model ejb diff --git a/web-services/modification/pom.xml b/web-services/modification/pom.xml index c16f11cf60..3a83535b59 100644 --- a/web-services/modification/pom.xml +++ b/web-services/modification/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-ws-modification ejb diff --git a/web-services/pom.xml b/web-services/pom.xml index f738f7c154..193861d42c 100644 --- a/web-services/pom.xml +++ b/web-services/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT gov.nsa.datawave.webservices datawave-ws-parent diff --git a/web-services/query-websocket/pom.xml b/web-services/query-websocket/pom.xml index 5fe144ebd4..e6b26792de 100644 --- a/web-services/query-websocket/pom.xml +++ b/web-services/query-websocket/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-ws-query-websocket war diff --git a/web-services/query/pom.xml b/web-services/query/pom.xml index e6e9eb38d6..ba56c57529 100644 --- a/web-services/query/pom.xml +++ b/web-services/query/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-ws-query ejb diff --git a/web-services/rest-api/pom.xml b/web-services/rest-api/pom.xml index 452e5182c8..b6b42df740 100644 --- a/web-services/rest-api/pom.xml +++ b/web-services/rest-api/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-ws-rest-api war diff --git a/web-services/security/pom.xml b/web-services/security/pom.xml index 4c47217b06..2c6b9670e9 100644 --- a/web-services/security/pom.xml +++ b/web-services/security/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-ws-security ejb diff --git a/web-services/web-root/pom.xml b/web-services/web-root/pom.xml index 0507485ebf..8897003cf4 100644 --- a/web-services/web-root/pom.xml +++ b/web-services/web-root/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.1.0-SNAPSHOT + 6.2.0-SNAPSHOT datawave-ws-web-root war From eb111672c3587b0a41e2c0e4776f1ee707b688e3 Mon Sep 17 00:00:00 2001 From: Moriarty <22225248+apmoriarty@users.noreply.github.com> Date: Tue, 14 Nov 2023 12:23:37 -0500 Subject: [PATCH 12/32] Add visitor that prunes query terms based on ingest types (#2141) * Add visitor that prunes query terms based on ingest types * Integrate IngestTypePruningVisitor into the RangeStream and DefaultQueryPlanner Fixed queries that contained arithmetic or identifier fields. --- .../query/config/ShardQueryConfiguration.java | 11 + .../query/index/lookup/RangeStream.java | 59 ++- .../visitors/IngestTypePruningVisitor.java | 471 +++++++++++++++++ .../query/planner/DefaultQueryPlanner.java | 21 + .../query/tables/ShardQueryLogic.java | 8 + .../config/ShardQueryConfigurationTest.java | 2 + .../IngestTypePruningVisitorTest.java | 491 ++++++++++++++++++ .../datawave/query/QueryLogicFactory.xml | 2 + 8 files changed, 1057 insertions(+), 8 deletions(-) create mode 100644 warehouse/query-core/src/main/java/datawave/query/jexl/visitors/IngestTypePruningVisitor.java create mode 100644 warehouse/query-core/src/test/java/datawave/query/jexl/visitors/IngestTypePruningVisitorTest.java diff --git a/warehouse/query-core/src/main/java/datawave/query/config/ShardQueryConfiguration.java b/warehouse/query-core/src/main/java/datawave/query/config/ShardQueryConfiguration.java index 3c8bd932ce..2ddbea1668 100644 --- a/warehouse/query-core/src/main/java/datawave/query/config/ShardQueryConfiguration.java +++ b/warehouse/query-core/src/main/java/datawave/query/config/ShardQueryConfiguration.java @@ -100,6 +100,8 @@ public class ShardQueryConfiguration extends GenericQueryConfiguration implement private int collapseUidsThreshold = -1; // Should this query dedupe terms within ANDs and ORs private boolean enforceUniqueTermsWithinExpressions = false; + // should this query attempt to prune terms via their ingest types + private boolean pruneQueryByIngestTypes = false; // should this query reduce the set of fields prior to serialization private boolean reduceQueryFields = false; private boolean reduceTypeMetadata = false; @@ -476,6 +478,7 @@ public ShardQueryConfiguration(ShardQueryConfiguration other) { this.setCollapseUids(other.getCollapseUids()); this.setCollapseUidsThreshold(other.getCollapseUidsThreshold()); this.setEnforceUniqueTermsWithinExpressions(other.getEnforceUniqueTermsWithinExpressions()); + this.setPruneQueryByIngestTypes(other.getPruneQueryByIngestTypes()); this.setReduceQueryFields(other.getReduceQueryFields()); this.setReduceTypeMetadata(other.getReduceTypeMetadata()); this.setReduceTypeMetadataPerShard(other.getReduceTypeMetadataPerShard()); @@ -2046,6 +2049,14 @@ public void setEnforceUniqueTermsWithinExpressions(boolean enforceUniqueTermsWit this.enforceUniqueTermsWithinExpressions = enforceUniqueTermsWithinExpressions; } + public boolean getPruneQueryByIngestTypes() { + return pruneQueryByIngestTypes; + } + + public void setPruneQueryByIngestTypes(boolean pruneQueryByIngestTypes) { + this.pruneQueryByIngestTypes = pruneQueryByIngestTypes; + } + public boolean getReduceQueryFields() { return reduceQueryFields; } diff --git a/warehouse/query-core/src/main/java/datawave/query/index/lookup/RangeStream.java b/warehouse/query-core/src/main/java/datawave/query/index/lookup/RangeStream.java index bc72f28dec..254ab45dd8 100644 --- a/warehouse/query-core/src/main/java/datawave/query/index/lookup/RangeStream.java +++ b/warehouse/query-core/src/main/java/datawave/query/index/lookup/RangeStream.java @@ -81,6 +81,8 @@ import datawave.query.jexl.visitors.BaseVisitor; import datawave.query.jexl.visitors.DepthVisitor; import datawave.query.jexl.visitors.EvaluationRendering; +import datawave.query.jexl.visitors.ExecutableDeterminationVisitor; +import datawave.query.jexl.visitors.IngestTypePruningVisitor; import datawave.query.jexl.visitors.JexlStringBuildingVisitor; import datawave.query.jexl.visitors.TreeFlatteningRebuildingVisitor; import datawave.query.planner.QueryPlan; @@ -91,6 +93,7 @@ import datawave.query.util.QueryScannerHelper; import datawave.query.util.Tuple2; import datawave.query.util.Tuples; +import datawave.query.util.TypeMetadata; import datawave.util.StringUtils; import datawave.util.time.DateHelper; import datawave.webservice.common.logging.ThreadConfigurableLogger; @@ -252,7 +255,7 @@ public Iterator iterator() { } } - this.itr = filter(concat(transform(queryStream, new TupleToRange(queryStream.currentNode(), config))), new EmptyPlanPruner()); + this.itr = filter(concat(transform(queryStream, new TupleToRange(queryStream.currentNode(), config))), getEmptyPlanPruner()); } } finally { // shut down the executor as all threads have completed @@ -261,18 +264,58 @@ public Iterator iterator() { return itr; } + public EmptyPlanPruner getEmptyPlanPruner() { + if (config.getPruneQueryByIngestTypes()) { + try { + return new EmptyPlanPruner(config, metadataHelper, metadataHelper.getTypeMetadata()); + } catch (TableNotFoundException e) { + throw new DatawaveFatalQueryException("Failed to get TypeMetadata", e); + } + } + + return new EmptyPlanPruner(); + } + + /** + * This class will prune a QueryPlan if either A) the ranges are empty or B) optionally, if no document can satisfy the query + */ public static class EmptyPlanPruner implements Predicate { + private ShardQueryConfiguration config; + private MetadataHelper metadataHelper; + private TypeMetadata typeMetadata; + + public EmptyPlanPruner() { + // no-op + } + + public EmptyPlanPruner(ShardQueryConfiguration config, MetadataHelper metadataHelper, TypeMetadata typeMetadata) { + this.config = config; + this.metadataHelper = metadataHelper; + this.typeMetadata = typeMetadata; + } + public boolean apply(QueryPlan plan) { - if (log.isTraceEnabled()) { - if (null != plan.getQueryTree() || (null == plan.getQueryString() || plan.getQueryString().isEmpty())) { - log.trace("Plan is " + JexlStringBuildingVisitor.buildQuery(plan.getQueryTree()) + " " + plan.getRanges() + " " - + plan.getRanges().iterator().hasNext()); - } else { - log.trace("Plan is " + plan.getQueryTree() + " " + plan.getRanges() + " " + plan.getRanges().iterator().hasNext()); + + if (!plan.getRanges().iterator().hasNext()) { + if (log.isTraceEnabled()) { + log.trace("Query plan had no ranges: " + JexlStringBuildingVisitor.buildQueryWithoutParse(plan.getQueryTree())); } + return false; } - return plan.getRanges().iterator().hasNext(); + + if (typeMetadata != null) { + JexlNode node = plan.getQueryTree(); + JexlNode result = IngestTypePruningVisitor.prune(node, typeMetadata); + if (!ExecutableDeterminationVisitor.isExecutable(result, config, metadataHelper)) { + return false; + } + + // update the query tree with the (potentially) pruned + plan.setQuery(JexlStringBuildingVisitor.buildQueryWithoutParse(result), result); + } + + return true; } } diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/IngestTypePruningVisitor.java b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/IngestTypePruningVisitor.java new file mode 100644 index 0000000000..93ad75b575 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/IngestTypePruningVisitor.java @@ -0,0 +1,471 @@ +package datawave.query.jexl.visitors; + +import static datawave.query.jexl.functions.ContentFunctions.CONTENT_FUNCTION_NAMESPACE; +import static datawave.query.jexl.functions.ContentFunctionsDescriptor.ContentJexlArgumentDescriptor; +import static datawave.query.jexl.functions.EvaluationPhaseFilterFunctions.EVAL_PHASE_FUNCTION_NAMESPACE; +import static datawave.query.jexl.functions.EvaluationPhaseFilterFunctionsDescriptor.EvaluationPhaseFilterJexlArgumentDescriptor; +import static datawave.query.jexl.functions.GeoWaveFunctionsDescriptor.GeoWaveJexlArgumentDescriptor; +import static datawave.query.jexl.functions.GroupingRequiredFilterFunctions.GROUPING_REQUIRED_FUNCTION_NAMESPACE; +import static datawave.query.jexl.functions.GroupingRequiredFilterFunctionsDescriptor.GroupingRequiredFilterJexlArgumentDescriptor; +import static datawave.query.jexl.functions.QueryFunctionsDescriptor.QueryJexlArgumentDescriptor; + +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.commons.jexl2.parser.ASTAndNode; +import org.apache.commons.jexl2.parser.ASTEQNode; +import org.apache.commons.jexl2.parser.ASTERNode; +import org.apache.commons.jexl2.parser.ASTFunctionNode; +import org.apache.commons.jexl2.parser.ASTGENode; +import org.apache.commons.jexl2.parser.ASTGTNode; +import org.apache.commons.jexl2.parser.ASTLENode; +import org.apache.commons.jexl2.parser.ASTLTNode; +import org.apache.commons.jexl2.parser.ASTNENode; +import org.apache.commons.jexl2.parser.ASTNRNode; +import org.apache.commons.jexl2.parser.ASTNotNode; +import org.apache.commons.jexl2.parser.ASTOrNode; +import org.apache.commons.jexl2.parser.ASTReference; +import org.apache.commons.jexl2.parser.ASTReferenceExpression; +import org.apache.commons.jexl2.parser.JexlNode; +import org.apache.commons.jexl2.parser.JexlNodes; +import org.apache.commons.jexl2.parser.SimpleNode; +import org.apache.log4j.Logger; + +import com.google.common.collect.Sets; + +import datawave.query.jexl.JexlASTHelper; +import datawave.query.jexl.functions.ContentFunctionsDescriptor; +import datawave.query.jexl.functions.EvaluationPhaseFilterFunctionsDescriptor; +import datawave.query.jexl.functions.FunctionJexlNodeVisitor; +import datawave.query.jexl.functions.GeoWaveFunctions; +import datawave.query.jexl.functions.GeoWaveFunctionsDescriptor; +import datawave.query.jexl.functions.GroupingRequiredFilterFunctionsDescriptor; +import datawave.query.jexl.functions.QueryFunctions; +import datawave.query.jexl.functions.QueryFunctionsDescriptor; +import datawave.query.jexl.nodes.ExceededOrThresholdMarkerJexlNode; +import datawave.query.jexl.nodes.QueryPropertyMarker; +import datawave.query.util.TypeMetadata; + +/** + * This visitor addresses the case when multiple ingest types share some but not all fields in a query + *

+ * Consider the query (A AND B) where term A maps to ingest type 1 and term B maps to ingest type 2. No document will ever satisfy this + * intersection. Thus, this should prune to zero terms. + *

+ * Consider the query (A AND (B OR C)) where term A and term B map to ingest type 1 and term C maps to ingest type 2. In this case term C should be + * pruned from the nested union leaving the intersection (A AND B) + *

+ */ +public class IngestTypePruningVisitor extends BaseVisitor { + private static final Logger log = Logger.getLogger(IngestTypePruningVisitor.class); + + private static final String UNKNOWN_TYPE = "UNKNOWN_TYPE"; + + // cache expensive calls to get ingest types per field + private final TypeMetadata typeMetadata; + private final Map> ingestTypeCache; + + private int termsPruned = 0; + private int nodesPruned = 0; + + public IngestTypePruningVisitor(TypeMetadata typeMetadata) { + this.typeMetadata = typeMetadata; + this.ingestTypeCache = new HashMap<>(); + } + + public static JexlNode prune(JexlNode node, TypeMetadata metadataHelper) { + IngestTypePruningVisitor visitor = new IngestTypePruningVisitor(metadataHelper); + node.jjtAccept(visitor, null); + if (visitor.getTermsPruned() > 0) { + log.info("pruned " + visitor.getTermsPruned() + " terms and " + visitor.getNodesPruned() + " nodes"); + } + return node; + } + + // leaf nodes + + @Override + public Object visit(ASTEQNode node, Object data) { + return visitOrPrune(node, data); + } + + @Override + public Object visit(ASTNENode node, Object data) { + return visitOrPrune(node, data); + } + + @Override + public Object visit(ASTLTNode node, Object data) { + return visitOrPrune(node, data); + } + + @Override + public Object visit(ASTGTNode node, Object data) { + return visitOrPrune(node, data); + } + + @Override + public Object visit(ASTLENode node, Object data) { + return visitOrPrune(node, data); + } + + @Override + public Object visit(ASTGENode node, Object data) { + return visitOrPrune(node, data); + } + + @Override + public Object visit(ASTERNode node, Object data) { + return visitOrPrune(node, data); + } + + @Override + public Object visit(ASTNRNode node, Object data) { + return visitOrPrune(node, data); + } + + // junction nodes + + @Override + public Object visit(ASTNotNode node, Object data) { + return visitOrPrune(node, data); + } + + @Override + public Object visit(ASTFunctionNode node, Object data) { + return visitOrPrune(node, data); + } + + @Override + public Object visit(SimpleNode node, Object data) { + return visitOrPrune((JexlNode) node, data); + } + + @Override + public Object visit(ASTReference node, Object data) { + return visitOrPrune(node, data); + } + + @Override + public Object visit(ASTReferenceExpression node, Object data) { + return visitOrPrune(node, data); + } + + @Override + public Object visit(ASTOrNode node, Object data) { + return visitOrPrune(node, data); + } + + /** + * ASTAndNodes is where all the pruning logic is applied + * + * @param node + * an ASTAndNode + * @param data + * some data + * @return a set of ingest types + */ + @Override + @SuppressWarnings("unchecked") + public Object visit(ASTAndNode node, Object data) { + + QueryPropertyMarker.Instance instance = QueryPropertyMarker.findInstance(node); + if (instance.isAnyType()) { + return visitMarker(instance, node, data); + } + + // getting ingest types for an intersection is different + Set ingestTypes = getIngestTypesForIntersection(node); + + // automatically prune if there is no common ingest type + if (ingestTypes.isEmpty()) { + pruneNodeFromParent(node); + return Collections.emptySet(); + } + + // the AndNode is where we can generate a set of ingest types used to prune child nodes + // if the data object passed in is not a set, use the current set of ingest types to prune + Set pruningTypes; + if (data instanceof Set) { + pruningTypes = (Set) data; // using the ingest types passed in + } else { + // prune using the aggregated ingest types + // this handles the case of a nested union + pruningTypes = ingestTypes; + } + + for (JexlNode child : JexlNodes.children(node)) { + child.jjtAccept(this, pruningTypes); + } + + if (node.jjtGetNumChildren() == 0) { + pruneNodeFromParent(node); + } + + return ingestTypes; + } + + /** + * Marker node visit is broken out for two primary reasons + *

+ * First, the source may be an ExceededOrThresholdMarkerJexlNode which requires special handling + *

+ * Second, the first child is an assignment node so the visitor must recurse through the second child + * + * @param instance + * a QueryPropertyMarker Instance + * @param node + * the QueryPropertyMarker's root node + * @param data + * the data + * @return the set of ingest types associated with this node + */ + @SuppressWarnings("unchecked") + private Set visitMarker(QueryPropertyMarker.Instance instance, JexlNode node, Object data) { + + // ExceededOr marker can be handled on its own + if (instance.isType(ExceededOrThresholdMarkerJexlNode.class)) { + String field = ExceededOrThresholdMarkerJexlNode.getField(instance.getSource()); + Set ingestTypes = getIngestTypesForField(field); + if (data instanceof Set) { + return pruneLeaf(ingestTypes, node, data); + } + return ingestTypes; + } + + JexlNode source = node.jjtGetChild(1); + Set dts = (Set) visit(source, data); + + if (source.jjtGetNumChildren() == 0) { + pruneNodeFromParent(source); + pruneNodeFromParent(node); + } + + return dts; + } + + // pruning methods + + private Set visitOrPrune(JexlNode node, Object data) { + + Set ingestTypes = getIngestTypes(node); + + // check for pruning + if (data instanceof Set) { + ingestTypes = prune(ingestTypes, node, data); + } + + // if all children were pruned, also prune this node + if (node.jjtGetNumChildren() == 0) { + pruneNodeFromParent(node); + } + + return ingestTypes; + } + + private Set prune(Set ingestTypes, JexlNode node, Object data) { + if (isJunction(node)) { + return pruneJunction(node, data); + } else { + return pruneLeaf(ingestTypes, node, data); + } + } + + @SuppressWarnings("unchecked") + private Set pruneLeaf(Set ingestTypes, JexlNode node, Object data) { + boolean prune = shouldPrune(ingestTypes, (Set) data); + + if (prune) { + pruneNodeFromParent(node); + termsPruned++; + } + return Collections.emptySet(); + } + + /** + * Helper method that takes two sets of ingestTypes and determines if the current node can be pruned + * + * @param ingestTypes + * the ingestTypes for the current node + * @param includes + * the ingestTypes used to prune + * @return true if the current node should be pruned + */ + private boolean shouldPrune(Set ingestTypes, Set includes) { + + // if either side has an UNKNOWN_TYPE, do not prune this node + if (ingestTypes.contains(UNKNOWN_TYPE) || includes.contains(UNKNOWN_TYPE)) { + return false; + } + + // prune if there was no overlap + return Sets.intersection(ingestTypes, includes).isEmpty(); + } + + private Set pruneJunction(JexlNode node, Object data) { + for (JexlNode child : JexlNodes.children(node)) { + child.jjtAccept(this, data); + } + return Collections.emptySet(); + } + + /** + * A 'junction' node is not only a union or intersection, it is a non-leaf node. + * + * @param node + * a JexlNode + * @return true if the node is a non-leaf + */ + private boolean isJunction(JexlNode node) { + JexlNode deref = JexlASTHelper.dereference(node); + // @formatter:off + return deref instanceof ASTAndNode || + deref instanceof ASTOrNode || + deref instanceof ASTReference || + deref instanceof ASTReferenceExpression || + deref instanceof ASTNotNode; + // @formatter:on + } + + // get ingest types + + private Set getIngestTypes(JexlNode node) { + if (isJunction(node)) { + return getIngestTypesForJunction(node); + } else { + return getIngestTypesForLeaf(node); + } + } + + @SuppressWarnings("unchecked") + public Set getIngestTypesForJunction(JexlNode node) { + Set ingestTypes = new HashSet<>(); + for (JexlNode child : JexlNodes.children(node)) { + Set found = (Set) child.jjtAccept(this, null); + ingestTypes.addAll(found); + } + return ingestTypes; + } + + /** + * In most cases a leaf will have a single field. In certain cases a function may produce more than one field, and in rare cases one may see leaf nodes like + * FIELD1 == FIELD2 + * + * @param node + * the leaf node + * @return a set of ingestTypes + */ + public Set getIngestTypesForLeaf(JexlNode node) { + Set ingestTypes = new HashSet<>(); + Set fields = getFieldsForLeaf(node); + for (String field : fields) { + ingestTypes.addAll(getIngestTypesForField(field)); + } + if (fields.isEmpty()) { + // could have nodes like arithmetic + ingestTypes.add(UNKNOWN_TYPE); + } + return ingestTypes; + } + + /** + * Get fields for a leaf node + * + * @param node + * a leaf node + * @return a set of ingest types + */ + public Set getFieldsForLeaf(JexlNode node) { + JexlNode deref = JexlASTHelper.dereference(node); + if (deref instanceof ASTFunctionNode) { + return getFieldsForFunctionNode((ASTFunctionNode) deref); + } + + // @formatter:off + return JexlASTHelper.getIdentifierNames(deref) + .stream() + .map(JexlASTHelper::deconstructIdentifier) + .collect(Collectors.toSet()); + // @formatter:on + } + + private Set getFieldsForFunctionNode(ASTFunctionNode node) { + FunctionJexlNodeVisitor visitor = FunctionJexlNodeVisitor.eval(node); + switch (visitor.namespace()) { + case CONTENT_FUNCTION_NAMESPACE: + // all content function fields are added + ContentJexlArgumentDescriptor contentDescriptor = new ContentFunctionsDescriptor().getArgumentDescriptor(node); + return contentDescriptor.fieldsAndTerms(Collections.emptySet(), Collections.emptySet(), Collections.emptySet(), null)[0]; + case EVAL_PHASE_FUNCTION_NAMESPACE: + // might be able to exclude certain evaluation phase functions from this step + EvaluationPhaseFilterJexlArgumentDescriptor evaluationDescriptor = (EvaluationPhaseFilterJexlArgumentDescriptor) new EvaluationPhaseFilterFunctionsDescriptor() + .getArgumentDescriptor(node); + return evaluationDescriptor.fields(null, Collections.emptySet()); + case GeoWaveFunctions.GEOWAVE_FUNCTION_NAMESPACE: + GeoWaveJexlArgumentDescriptor descriptor = (GeoWaveJexlArgumentDescriptor) new GeoWaveFunctionsDescriptor().getArgumentDescriptor(node); + return descriptor.fields(null, Collections.emptySet()); + case GROUPING_REQUIRED_FUNCTION_NAMESPACE: + GroupingRequiredFilterJexlArgumentDescriptor groupingDescriptor = (GroupingRequiredFilterJexlArgumentDescriptor) new GroupingRequiredFilterFunctionsDescriptor() + .getArgumentDescriptor(node); + return groupingDescriptor.fields(null, Collections.emptySet()); + case QueryFunctions.QUERY_FUNCTION_NAMESPACE: + QueryJexlArgumentDescriptor queryDescriptor = (QueryJexlArgumentDescriptor) new QueryFunctionsDescriptor().getArgumentDescriptor(node); + return queryDescriptor.fields(null, Collections.emptySet()); + default: + // do nothing + log.warn("Unhandled function namespace: " + visitor.namespace()); + return Collections.emptySet(); + } + } + + public Set getIngestTypesForField(String field) { + if (!ingestTypeCache.containsKey(field)) { + Set types = typeMetadata.getDataTypesForField(field); + if (types.isEmpty()) { + types.add(UNKNOWN_TYPE); + } + ingestTypeCache.put(field, types); + } + return ingestTypeCache.get(field); + } + + @SuppressWarnings("unchecked") + private Set getIngestTypesForIntersection(ASTAndNode node) { + Set ingestTypes = new HashSet<>(); + for (JexlNode child : JexlNodes.children(node)) { + Set childIngestTypes = (Set) child.jjtAccept(this, null); + + ingestTypes = ingestTypes.isEmpty() ? childIngestTypes : intersectTypes(ingestTypes, childIngestTypes); + + if (ingestTypes.isEmpty()) { + // short circuit. no need to continue traversing the intersection. + break; + } + } + return ingestTypes; + } + + private Set intersectTypes(Set typesA, Set typesB) { + if (typesA.contains(UNKNOWN_TYPE) || typesB.contains(UNKNOWN_TYPE)) { + return Collections.singleton(UNKNOWN_TYPE); + } + return Sets.intersection(typesA, typesB); + } + + private void pruneNodeFromParent(JexlNode node) { + JexlNodes.removeFromParent(node.jjtGetParent(), node); + nodesPruned++; + } + + public int getTermsPruned() { + return termsPruned; + } + + public int getNodesPruned() { + return nodesPruned; + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java index b7b30a9d1d..a37e1a46f2 100644 --- a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java +++ b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java @@ -111,6 +111,7 @@ import datawave.query.jexl.visitors.FixUnindexedNumericTerms; import datawave.query.jexl.visitors.FunctionIndexQueryExpansionVisitor; import datawave.query.jexl.visitors.GeoWavePruningVisitor; +import datawave.query.jexl.visitors.IngestTypePruningVisitor; import datawave.query.jexl.visitors.InvertNodeVisitor; import datawave.query.jexl.visitors.IsNotNullIntentVisitor; import datawave.query.jexl.visitors.IsNotNullPruningVisitor; @@ -127,6 +128,7 @@ import datawave.query.jexl.visitors.QueryOptionsFromQueryVisitor; import datawave.query.jexl.visitors.QueryPropertyMarkerSourceConsolidator; import datawave.query.jexl.visitors.QueryPruningVisitor; +import datawave.query.jexl.visitors.RebuildingVisitor; import datawave.query.jexl.visitors.RegexFunctionVisitor; import datawave.query.jexl.visitors.RegexIndexExpansionVisitor; import datawave.query.jexl.visitors.RewriteNegationsVisitor; @@ -2536,6 +2538,17 @@ public Tuple2,Boolean> getQueryRanges(ScannerFactor fullTableScanReason = state.reason; } + if (config.getPruneQueryByIngestTypes()) { + JexlNode pruned = IngestTypePruningVisitor.prune(RebuildingVisitor.copy(queryTree), getTypeMetadata()); + if (config.getFullTableScanEnabled() || ExecutableDeterminationVisitor.isExecutable(pruned, config, metadataHelper)) { + // always update the query for full table scans or in cases where the query is still executable + queryTree = pruned; + config.setQueryTree((ASTJexlScript) pruned); + } else { + throw new DatawaveFatalQueryException("Check query for mutually exclusive ingest types, query was non-executable after pruning by ingest type"); + } + } + // if a simple examination of the query has not forced a full table // scan, then lets try to compute ranges if (!needsFullTable) { @@ -2606,6 +2619,14 @@ public Tuple2,Boolean> getQueryRanges(ScannerFactor return new Tuple2<>(ranges, needsFullTable); } + private TypeMetadata getTypeMetadata() { + try { + return metadataHelper.getTypeMetadata(); + } catch (TableNotFoundException e) { + throw new DatawaveFatalQueryException("Could not get TypeMetadata"); + } + } + /** * Initializes the range stream, whether it is configured to be a different class than the Default Range stream or not. * diff --git a/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java b/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java index d0836c9743..5375ffafad 100644 --- a/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java +++ b/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java @@ -2233,6 +2233,14 @@ public void setEnforceUniqueTermsWithinExpressions(boolean enforceUniqueTermsWit this.getConfig().setEnforceUniqueTermsWithinExpressions(enforceUniqueTermsWithinExpressions); } + public boolean getPruneQueryByIngestTypes() { + return getConfig().getPruneQueryByIngestTypes(); + } + + public void setPruneQueryByIngestTypes(boolean pruneQueryByIngestTypes) { + getConfig().setPruneQueryByIngestTypes(pruneQueryByIngestTypes); + } + public boolean getReduceQueryFields() { return this.getConfig().getReduceQueryFields(); } diff --git a/warehouse/query-core/src/test/java/datawave/query/config/ShardQueryConfigurationTest.java b/warehouse/query-core/src/test/java/datawave/query/config/ShardQueryConfigurationTest.java index 7711eabfa7..cf4206c22e 100644 --- a/warehouse/query-core/src/test/java/datawave/query/config/ShardQueryConfigurationTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/config/ShardQueryConfigurationTest.java @@ -440,6 +440,8 @@ public void setUp() throws Exception { updatedValues.put("tfAggregationThresholdMs", 10000); defaultValues.put("pruneQueryOptions", false); updatedValues.put("pruneQueryOptions", true); + defaultValues.put("pruneQueryByIngestTypes", false); + updatedValues.put("pruneQueryByIngestTypes", true); defaultValues.put("numIndexLookupThreads", 8); updatedValues.put("numIndexLookupThreads", 18); defaultValues.put("expansionLimitedToModelContents", false); diff --git a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/IngestTypePruningVisitorTest.java b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/IngestTypePruningVisitorTest.java new file mode 100644 index 0000000000..1ea887e688 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/IngestTypePruningVisitorTest.java @@ -0,0 +1,491 @@ +package datawave.query.jexl.visitors; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; + +import org.apache.commons.jexl2.parser.ASTJexlScript; +import org.apache.log4j.Logger; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import datawave.data.type.LcType; +import datawave.query.jexl.JexlASTHelper; +import datawave.query.jexl.visitors.validate.ASTValidator; +import datawave.query.util.TypeMetadata; + +public class IngestTypePruningVisitorTest { + + private static final Logger log = Logger.getLogger(IngestTypePruningVisitorTest.class); + + private static final TypeMetadata typeMetadata = new TypeMetadata(); + private final ASTValidator validator = new ASTValidator(); + + @BeforeAll + public static void setup() { + typeMetadata.put("A", "ingestType1", LcType.class.getTypeName()); + typeMetadata.put("A", "ingestType2", LcType.class.getTypeName()); + typeMetadata.put("A", "ingestType3", LcType.class.getTypeName()); + + typeMetadata.put("B", "ingestType1", LcType.class.getTypeName()); + typeMetadata.put("B", "ingestType2", LcType.class.getTypeName()); + + typeMetadata.put("C", "ingestType5", LcType.class.getTypeName()); + + typeMetadata.put("123", "ingestType1", LcType.class.getTypeName()); + } + + @Test + void testNoOps() { + // @formatter:off + String[] queries = { + "A == '1' || B == '2'", + "A == '1' && B == '2'" + }; + // @formatter:on + + for (String query : queries) { + test(query, query); + } + } + + // test cases for no pruning, multiple node types + @Test + void testNoOpsWithMultipleLeafTypes() { + // @formatter:off + String[] queries = { + "A == '1' && B == '2'", + "A == '1' && B != '2'", + "A == '1' && !(B == '2')", + "A == '1' && B =~ '2'", + "A == '1' && B !~ '2'", + "A == '1' && !(B =~ '2')", + "A == '1' && B < '2'", + "A == '1' && B <= '2'", + "A == '1' && B > '2'", + "A == '1' && B >= '2'", + }; + // @formatter:on + + for (String query : queries) { + test(query, query); + } + } + + // case where two nodes do not share an ingest type + @Test + void testEmptyIntersection() { + // @formatter:off + String[] queries = { + "A == '1' && C == '3'", + "A == '1' && B == '2' && C == '3'", + "A == '1' && C != '3'", + "A == '1' && !(C == '3')", + "A == '1' && C =~ '3'", + "A == '1' && C !~ '3'", + "A == '1' && !(C =~ '3')", + "A == '1' && C < '3'", + "A == '1' && C <= '3'", + "A == '1' && C > '3'", + "A == '1' && C >= '3'", + }; + // @formatter:on + + for (String query : queries) { + test(query, null); + } + } + + // A && (B || C) + // ingestType 1 = A, B + // ingestType 2 = C + @Test + void testPruneNestedUnion() { + // prune C term + String query = "A == '1' && (B == '2' || C == '3')"; + String expected = "A == '1' && B == '2'"; + test(query, expected); + + // prune multiple C terms + query = "A == '1' && (B == '2' || C == '3' || C == '4')"; + expected = "A == '1' && B == '2'"; + test(query, expected); + + // whole union pruned, which leads to whole query getting pruned + query = "A == '1' && (C == '3' || C == '4')"; + test(query, null); + } + + // A && (B || C) + // ingestType 1 = A, B + // ingestType 2 = C + @Test + void testPruneComplexNestedUnion() { + // double nested C term pruned + String query = "A == '1' && (B == '2' || (C == '3' && C == '5'))"; + String expected = "A == '1' && B == '2'"; + test(query, expected); + + // double nested C term pruned, nested union persists + query = "A == '1' && (B == '2' || B == '0' || (C == '3' && C == '5'))"; + expected = "A == '1' && (B == '2' || B == '0')"; + test(query, expected); + + // double nested intersection of A and C pruned, nested union persists + query = "A == '1' && (B == '2' || B == '0' || (C == '3' && A == '15'))"; + expected = "A == '1' && (B == '2' || B == '0')"; + test(query, expected); + } + + @Test + void testOtherComplexNestedUnion() { + // doesn't matter how complex the nesting is, C term should drive pruning + String query = "C == '1' && (B == '2' || B == '3' || (A == '4' && A == '5'))"; + test(query, null); + } + + @Test + void testDoubleNestedPruning() { + // base case, should be fine + String query = "(A == '1' || B == '2') && (A == '3' || B == '4')"; + test(query, query); + + // no intersection of types + query = "(A == '1' || B == '2') && (C == '3' || C == '4')"; + test(query, null); + + // no intersection of types + query = "(C == '1' || C == '2') && (A == '3' || B == '4')"; + test(query, null); + } + + @Test + void testDoubleNestedUnionWithRangeStreamPruning() { + // this case demonstrates how a top level query could pass ingest type pruning + // but still get modified by range stream pruning. In some cases further pruning + // by this visitor would be necessary. + + // query passes ingest type pruning without issue + String query = "(A == '1' || C == '2') && (B == '3' || C == '4')"; + test(query, query); + + // A term pruned by range stream, B term has no effect on resulting query + query = "C == '2' && (B == '3' || C == '4')"; + test(query, "C == '2' && C == '4'"); + + // B term pruned by range stream, C term has no effect on resulting query + query = "(A == '1' || C == '2') && B == '3'"; + test(query, "A == '1' && B == '3'"); + + // left C term pruned by range stream, right C term has no effect on resulting query + query = "A == '1' && (B == '3' || C == '4')"; + test(query, "A == '1' && B == '3'"); + + // right C term pruned by range stream, left C term has no effect on resulting query + query = "(A == '1' || C == '2') && B == '3'"; + test(query, "A == '1' && B == '3'"); + + // left union pruned by range stream, no pruning to do in resulting query + query = "B == '3' || C == '4'"; + test(query, query); + + // right union pruned by range stream, no pruning to do in resulting query + query = "A == '1' || C == '2'"; + test(query, query); + } + + @Test + void testOverlappingExclusions() { + TypeMetadata metadata = new TypeMetadata(); + metadata.put("A", "ingestType1", LcType.class.getTypeName()); + metadata.put("A", "ingestType2", LcType.class.getTypeName()); + metadata.put("B", "ingestType2", LcType.class.getTypeName()); + metadata.put("B", "ingestType3", LcType.class.getTypeName()); + metadata.put("C", "ingestType3", LcType.class.getTypeName()); + metadata.put("C", "ingestType4", LcType.class.getTypeName()); + metadata.put("D", "ingestType4", LcType.class.getTypeName()); + metadata.put("D", "ingestType5", LcType.class.getTypeName()); + + // A && B prune to ingestType 2 + // C && D prune to ingestType 4 + // top level B term intersects with union of ingest types 2, 4 producing a singleton of ingestType 2 + // range stream pruning means we could still end up with a non-viable query + // if the A term is not found + String query = "B == '22' && ((A == '1' && B == '2') || (C == '3' && D == '4'))"; + String expected = "B == '22' && (A == '1' && B == '2')"; + test(query, expected, metadata); + } + + @Test + void testYetAnotherComplexNestedUnion() { + TypeMetadata metadata = new TypeMetadata(); + metadata.put("A", "ingestType1", LcType.class.getTypeName()); + metadata.put("B", "ingestType1", LcType.class.getTypeName()); + metadata.put("C", "ingestType2", LcType.class.getTypeName()); + metadata.put("D", "ingestType2", LcType.class.getTypeName()); + + // need the complex case when a top level intersection drives the pruning of a nested union-intersection + // A && (B || (C && D)) + // A = 1 + // B = 1 + // C = 2 + // D = 2 + + String query = "A == '1' && (B == '2' || (C == '3' && D == '4'))"; + String expected = "A == '1' && B == '2'"; + test(query, expected, metadata); + + // same datatypes, drop the single union term + query = "A == '1' && (C == '5' || (A == '2' && B == '3'))"; + expected = "A == '1' && A == '2' && B == '3'"; + test(query, expected, metadata); + + query = "C == '1' && (A == '2' || (B == '3' && C == '4'))"; + test(query, null); + } + + @Test + void testIntersectionsWithNonIndexedFields() { + // @formatter:off + String[] queries = { + // D term is not indexed + "A == '1' && D == '3'", + "A == '1' && B == '2' && D == '3'", + "A == '1' && D != '3'", + "A == '1' && !(D == '3')", + "A == '1' && D =~ '3'", + "A == '1' && D !~ '3'", + "A == '1' && !(D =~ '3')" + }; + // @formatter:on + + for (String query : queries) { + test(query, query); + } + } + + @Test + void testIntersectionsWithIncompleteUnions() { + // @formatter:off + String[] queries = { + "A == '1' && (B == 2 || filter:includeRegex(D, 'value.*'))", + "A == '1' && (B == 2 || filter:excludeRegex(D, 'value.*'))", + }; + // @formatter:on + + for (String query : queries) { + test(query, query); + } + } + + @Test + void testIntersectionsWithQueryFunctions() { + // each function type + + // @formatter:off + String[] queries = { + "A == '1' && f:between(B, a, b)", + "A == '1' && f:length(B, '2', '3')", + // by the time the ingestType pruning visitor is run, a multi-fielded + // include function should be decomposed into discrete functions + "A == '1' && f:includeText(B, 'ba.*')", + "A == '1' && f:matchRegex(B, 'ba.*')", + "A == '1' && f:matchRegex(B, C, 'ba.*')", + }; + // @formatter:on + + // no change for these queries + for (String query : queries) { + test(query, query); + } + } + + @Test + void testIntersectionsWithMarkers() { + // all marker node types + // @formatter:off + String[] queries = { + "A == '1' && ((_Bounded_ = true) && (B >= '0' && B <= '10'))", + "A == '1' && ((_Delayed_ = true) && (B == '2'))", + "A == '1' && ((_Delayed_ = true) && (A == '1' || B == '2'))", + "A == '1' && ((_Delayed_ = true) && (A == '1' && B == '2'))", + "A == '1' && ((_Eval_ = true) && (B == '2'))", + "A == '1' && ((_List_ = true) && ((id = 'some-bogus-id') && (field = 'B') && (params = '{\"values\":[\"a\",\"b\",\"c\"]}')))", + "A == '1' && ((_Term_ = true) && (B == '2'))", + "A == '1' && ((_Value_ = true) && (B =~ 'ba.*'))", + "A == '1' && ((_Value_ = true) && (A =~ 'ab.*' || B =~ 'ba.*'))", + "A == '1' && ((_Value_ = true) && (A =~ 'ab.*' && B =~ 'ba.*'))" + }; + // @formatter:on + + for (String query : queries) { + test(query, query); + } + + // same queries as above, test pruning + // @formatter:off + queries = new String[] { + "A == '1' && ((_Bounded_ = true) && (C >= '0' && C <= '10'))", + "A == '1' && ((_Delayed_ = true) && (C == '2'))", + "A == '1' && ((_Eval_ = true) && (C == '2'))", + "A == '1' && ((_List_ = true) && ((id = 'some-bogus-id') && (field = 'C') && (params = '{\"values\":[\"a\",\"b\",\"c\"]}')))", + "A == '1' && ((_Term_ = true) && (C == '2'))", + "A == '1' && ((_Value_ = true) && (C =~ 'ba.*'))" + }; + // @formatter:on + + for (String query : queries) { + test(query, null); + } + } + + @Test + void testMultiFieldedMarkers() { + // case 1: delayed intersection of non-intersecting ingestTypes should remove itself + String query = "((_Delayed_ = true) && (A == '1' && C == '2'))"; + test(query, null); + + // case 2: overlapping ingestTypes + query = "A == '1' && ((_Delayed_ = true) && (B == '1' || C == '2'))"; + test(query, "A == '1' && ((_Delayed_ = true) && (B == '1'))"); + + // case 3: non-intersecting ingestTypes (function removes itself) + query = "A == '1' && ((_Delayed_ = true) && (A == '1' && C == '2'))"; + test(query, null); + + // case 4: unknown field and how that works + query = "((_Delayed_ = true) && (A == '1' && D == '2'))"; + test(query, query); + } + + @Test + void testDelayedBoundedMarker() { + String query = "((_Delayed_ = true) && ((_Bounded_ = true) && (A > '2' && A < '4')))"; + test(query, query); + + // C term drives pruning of double nested marker + query = "C == '1' && ((_Delayed_ = true) && ((_Bounded_ = true) && (A > '2' && A < '4')))"; + test(query, null); + + query = "((_Delayed_ = true) && ((_Bounded_ = true) && (A > '2' && A < '4'))) && C == '1'"; + test(query, null); + } + + @Test + void testDelayedEvaluationOnlyMarker() { + String query = "((_Delayed_ = true) && ((_Eval_ = true) && (A == '1')))"; + test(query, query); + + // C term drives pruning of double nested marker + query = "C == '1' && ((_Delayed_ = true) && ((_Eval_ = true) && (A == '1')))"; + test(query, null); + } + + @Test + void testDelayedListMarker() { + String query = "((_Delayed_ = true) && ((_List_ = true) && ((id = 'some-bogus-id') && (field = 'A') && (params = '{\"values\":[\"a\",\"b\",\"c\"]}'))))"; + test(query, query); + + // C term drives pruning of double nested marker + query = "C == '1' && ((_Delayed_ = true) && ((_List_ = true) && ((id = 'some-bogus-id') && (field = 'A') && (params = '{\"values\":[\"a\",\"b\",\"c\"]}'))))"; + test(query, null); + } + + @Test + void testDelayedTermMarker() { + String query = "((_Delayed_ = true) && ((_Term_ = true) && (A =~ 'ba.*')))"; + test(query, query); + + // C term drives pruning of double nested marker + query = "C == '1' && ((_Delayed_ = true) && ((_Term_ = true) && (A =~ 'ba.*')))"; + test(query, null); + } + + @Test + void testDelayedValueMarker() { + String query = "((_Delayed_ = true) && ((_Value_ = true) && (A =~ 'ba.*' && B =~ 'ba.*')))"; + test(query, query); + + // C term drives pruning of double nested markers + query = "C == '1' && ((_Delayed_ = true) && ((_Value_ = true) && (A =~ 'ba.*' && B =~ 'ba.*')))"; + test(query, null); + + // root marker with multiple conflicting sources should self-prune + query = "((_Delayed_ = true) && ((_Value_ = true) && (A =~ 'ba.*' && C =~ 'ba.*')))"; + test(query, null); + } + + @Test + void testMultiFieldedFunctions() { + String query = "A == '1' && filter:compare(A,'==','ANY','C')"; + test(query, query); + } + + @Test + void testEvaluationOnlyField() { + // evaluation only fields are not guaranteed to have an 'e' column in + // the datawave metadata table. In this case the Z term has no entry. + String query = "A == '1' && Z == '2'"; + test(query, query); + } + + @Test + void testPruneNegation() { + String query = "A == '1' || !((_Delayed_ = true) && (A == '1' && C == '2'))"; + test(query, "A == '1'"); + } + + @Test + void testFullyPrunedTree() { + String query = "(false)"; + test(query, query); + } + + @Test + void testIdentifiers() { + String query = "A == '1' && $123 == '123'"; + test(query, query); + + query = "C == '1' && $123 == '123'"; + test(query, null); + } + + @Test + void testArithmetic() { + String query = "A == '1' && 1 + 1 == 3"; + test(query, query); + } + + private void test(String query, String expected) { + test(query, expected, typeMetadata); + } + + private void test(String query, String expected, TypeMetadata metadata) { + try { + ASTJexlScript script = JexlASTHelper.parseAndFlattenJexlQuery(query); + ASTJexlScript pruned = (ASTJexlScript) IngestTypePruningVisitor.prune(script, metadata); + + log.info("input : " + query); + log.info("output : " + JexlStringBuildingVisitor.buildQuery(pruned)); + log.info("expected: " + expected); + + // all pruned scripts must be valid + assertTrue(validator.isValid(pruned)); + + // we might be expecting nothing as a result + if (expected == null) { + log.trace("expected null! " + JexlStringBuildingVisitor.buildQuery(pruned)); + assertEquals(0, pruned.jjtGetNumChildren()); + return; + } + + ASTJexlScript expectedScript = JexlASTHelper.parseAndFlattenJexlQuery(expected); + TreeEqualityVisitor.Comparison comparison = TreeEqualityVisitor.checkEquality(expectedScript, pruned); + assertTrue(comparison.isEqual(), "Jexl tree comparison failed with reason: " + comparison.getReason()); + + } catch (Exception e) { + e.printStackTrace(); + fail("test failed: " + e.getMessage()); + } + } +} diff --git a/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml b/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml index 474c17aa34..5699878175 100644 --- a/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml +++ b/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml @@ -251,6 +251,8 @@ + + From 75a90c57b48fb01b8c154a9904ef2a8479ac6666 Mon Sep 17 00:00:00 2001 From: Ivan Bella Date: Tue, 17 Oct 2023 08:42:29 -0400 Subject: [PATCH 13/32] Adding a short lived cache around the remote user operations (#2128) --- .../core/src/main/resources/CacheContext.xml | 4 ++-- .../remote/RemoteUserOperationsImpl.java | 19 ++++++++++--------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/warehouse/core/src/main/resources/CacheContext.xml b/warehouse/core/src/main/resources/CacheContext.xml index a8ae9dc4c8..90b996a9b0 100644 --- a/warehouse/core/src/main/resources/CacheContext.xml +++ b/warehouse/core/src/main/resources/CacheContext.xml @@ -21,8 +21,8 @@ - - + + diff --git a/web-services/security/src/main/java/datawave/security/authorization/remote/RemoteUserOperationsImpl.java b/web-services/security/src/main/java/datawave/security/authorization/remote/RemoteUserOperationsImpl.java index e7a497c294..b11ceb41c1 100644 --- a/web-services/security/src/main/java/datawave/security/authorization/remote/RemoteUserOperationsImpl.java +++ b/web-services/security/src/main/java/datawave/security/authorization/remote/RemoteUserOperationsImpl.java @@ -50,10 +50,18 @@ public void init() { } @Override - @Cacheable(value = "listEffectiveAuthorizations", key = "{#callerObject}", cacheManager = "remoteUserOperationsCacheManager") + @Cacheable(value = "getRemoteUser", key = "{#principal}", cacheManager = "remoteOperationsCacheManager") + public DatawavePrincipal getRemoteUser(DatawavePrincipal principal) throws AuthorizationException { + log.info("Cache fault: Retrieving user for " + principal.getPrimaryUser().getDn()); + return UserOperations.super.getRemoteUser(principal); + } + + @Override + @Cacheable(value = "listEffectiveAuthorizations", key = "{#callerObject}", cacheManager = "remoteOperationsCacheManager") public AuthorizationsListBase listEffectiveAuthorizations(Object callerObject) throws AuthorizationException { init(); final DatawavePrincipal principal = getDatawavePrincipal(callerObject); + log.info("Cache fault: Retrieving effective auths for " + principal.getPrimaryUser().getDn()); final String suffix = LIST_EFFECTIVE_AUTHS; // includeRemoteServices=false to avoid any loops return executeGetMethodWithRuntimeException(suffix, uriBuilder -> { @@ -83,14 +91,7 @@ public GenericResponse flushCachedCredentials(Object callerObject) throw return readResponse(entity, genericResponseReader); }, () -> suffix); } - - @Override - @Cacheable(value = "remoteUser", key = "{#principal}", cacheManager = "remoteUserOperationsCacheManager") - public DatawavePrincipal getRemoteUser(DatawavePrincipal principal) throws AuthorizationException { - log.info("Cache fault: Retrieving user for " + principal.getPrimaryUser().getDn()); - return UserOperations.super.getRemoteUser(principal); - } - + private DatawavePrincipal getDatawavePrincipal(Object callerObject) { if (callerObject instanceof DatawavePrincipal) { return (DatawavePrincipal) callerObject; From 3dea14c0d30c880ba1e7a60a4ef85689c5b74824 Mon Sep 17 00:00:00 2001 From: Ivan Bella Date: Fri, 27 Oct 2023 07:12:41 -0400 Subject: [PATCH 14/32] Fixing the composite query logic to handle long running queries (#2147) * Fixing the composite query logic to handle long running queries * Do not pass EmptyObjectExceptions all the way through the composite query logic This was resulting in was too many empty pages * Updated to rely on RunningQuery to handle intermediate results for long running queries in the CompositeQueryLogic --- .../query/transformer/GroupingTransform.java | 2 +- .../query/logic/composite/CompositeQueryLogic.java | 14 +++++++------- .../logic/composite/CompositeQueryLogicTest.java | 14 +++++++------- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/warehouse/query-core/src/main/java/datawave/query/transformer/GroupingTransform.java b/warehouse/query-core/src/main/java/datawave/query/transformer/GroupingTransform.java index 8327188210..52ad22b4fe 100644 --- a/warehouse/query-core/src/main/java/datawave/query/transformer/GroupingTransform.java +++ b/warehouse/query-core/src/main/java/datawave/query/transformer/GroupingTransform.java @@ -113,7 +113,7 @@ public void setQueryExecutionForPageStartTime(long queryExecutionForPageStartTim log.debug("setting query execution page start time to {}", queryExecutionForPageStartTime); super.setQueryExecutionForPageStartTime(queryExecutionForPageStartTime); } - + @Override public Entry flush() { Document document = null; diff --git a/web-services/query/src/main/java/datawave/webservice/query/logic/composite/CompositeQueryLogic.java b/web-services/query/src/main/java/datawave/webservice/query/logic/composite/CompositeQueryLogic.java index f689fa7a80..af6e8e57a7 100644 --- a/web-services/query/src/main/java/datawave/webservice/query/logic/composite/CompositeQueryLogic.java +++ b/web-services/query/src/main/java/datawave/webservice/query/logic/composite/CompositeQueryLogic.java @@ -121,10 +121,10 @@ public void run() { startLatch.countDown(); started = true; } - + // ensure we start with a reasonable page time resetPageProcessingStartTime(); - + // the results queue is also an exception handler setUncaughtExceptionHandler(results); boolean success = false; @@ -138,7 +138,7 @@ public void run() { last = transformIterator.next(); if (null != last) { log.debug(Thread.currentThread().getName() + ": Got result"); - + // special logic to deal with intermediate results if (last instanceof EventBase && ((EventBase) last).isIntermediateResult()) { resetPageProcessingStartTime(); @@ -148,7 +148,7 @@ public void run() { last = null; } } - + if (last != null) { results.add(last); resultCount++; @@ -178,7 +178,7 @@ public void run() { log.trace("Finished thread: " + this.getName() + " with success = " + success); } } - + public void resetPageProcessingStartTime() { logic.setPageProcessingStartTime(System.currentTimeMillis()); } @@ -634,7 +634,7 @@ public void setPageProcessingStartTime(long pageProcessingStartTime) { logic.setPageProcessingStartTime(pageProcessingStartTime); } } - + @Override public boolean isLongRunningQuery() { for (QueryLogic l : getQueryLogics().values()) { @@ -644,7 +644,7 @@ public boolean isLongRunningQuery() { } return false; } - + public boolean isAllMustInitialize() { return getConfig().isAllMustInitialize(); } diff --git a/web-services/query/src/test/java/datawave/webservice/query/logic/composite/CompositeQueryLogicTest.java b/web-services/query/src/test/java/datawave/webservice/query/logic/composite/CompositeQueryLogicTest.java index cdc7b152b4..eee4f84ee5 100644 --- a/web-services/query/src/test/java/datawave/webservice/query/logic/composite/CompositeQueryLogicTest.java +++ b/web-services/query/src/test/java/datawave/webservice/query/logic/composite/CompositeQueryLogicTest.java @@ -397,7 +397,7 @@ public QueryLogicTransformer getTransformer(Query settings) { public GenericQueryConfiguration initialize(AccumuloClient client, Query settings, Set runtimeQueryAuthorizations) throws Exception { return new TestQueryConfiguration(); } - + @Override public boolean isLongRunningQuery() { return true; @@ -1438,20 +1438,20 @@ public void testIsLongRunningQuery() throws Exception { TestQueryLogic logic2 = new TestQueryLogic(); logics.put("TestQueryLogic", logic1); logics.put("TestQueryLogic2", logic2); - + CompositeQueryLogic c = new CompositeQueryLogic(); c.setQueryLogics(logics); - + Assert.assertFalse(c.isLongRunningQuery()); - + TestQueryLogic2 logic3 = new TestQueryLogic2(); logics.put("TestQueryLogic3", logic3); - + c.setQueryLogics(logics); - + Assert.assertTrue(c.isLongRunningQuery()); } - + @Test public void testAuthorizationsUpdate() throws Exception { Map> logics = new HashMap<>(); From b5ee0de6f280142a26eddda77de0ac061b2e681e Mon Sep 17 00:00:00 2001 From: Ivan Bella Date: Tue, 14 Nov 2023 15:42:01 -0500 Subject: [PATCH 15/32] re #2162: Updated the composite query to be lenient when gathering (#2163) * re #2162: Updated the composite query to be lenient when gathering effective authorizations in shortCircuitExecution mode * re #2162: Catch all exceptions and also handle the getRemoteUser call --- .../logic/composite/CompositeQueryLogic.java | 2 +- .../composite/CompositeUserOperations.java | 40 ++++++++++++++----- 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/web-services/query/src/main/java/datawave/webservice/query/logic/composite/CompositeQueryLogic.java b/web-services/query/src/main/java/datawave/webservice/query/logic/composite/CompositeQueryLogic.java index af6e8e57a7..06aa1a01ef 100644 --- a/web-services/query/src/main/java/datawave/webservice/query/logic/composite/CompositeQueryLogic.java +++ b/web-services/query/src/main/java/datawave/webservice/query/logic/composite/CompositeQueryLogic.java @@ -527,7 +527,7 @@ public UserOperations getUserOperations() { } } if (!userOperations.isEmpty()) { - return new CompositeUserOperations(userOperations, includeLocal, responseObjectFactory); + return new CompositeUserOperations(userOperations, includeLocal, isShortCircuitExecution(), responseObjectFactory); } return null; } diff --git a/web-services/query/src/main/java/datawave/webservice/query/logic/composite/CompositeUserOperations.java b/web-services/query/src/main/java/datawave/webservice/query/logic/composite/CompositeUserOperations.java index 68b7603737..cd2b4880b9 100644 --- a/web-services/query/src/main/java/datawave/webservice/query/logic/composite/CompositeUserOperations.java +++ b/web-services/query/src/main/java/datawave/webservice/query/logic/composite/CompositeUserOperations.java @@ -30,11 +30,15 @@ public class CompositeUserOperations implements UserOperations { final ResponseObjectFactory responseObjectFactory; final List userOperations; final boolean includeLocal; - - public CompositeUserOperations(List remoteOperations, boolean includeLocal, ResponseObjectFactory responseObjectFactory) { + final boolean shortCircuitExecution; + + public CompositeUserOperations(List remoteOperations, boolean includeLocal, boolean shortCircuitExecution, + ResponseObjectFactory responseObjectFactory) { this.responseObjectFactory = responseObjectFactory; this.userOperations = remoteOperations; this.includeLocal = includeLocal; + // if shortCircuitExecution, then simply make our best effort meaning if any remote operations fail we will simply not include that one. + this.shortCircuitExecution = shortCircuitExecution; } @Override @@ -46,14 +50,21 @@ public AuthorizationsListBase listEffectiveAuthorizations(Object callerObject) t principal.getProxiedUsers().forEach(u -> authMap.put(dn(u.getDn()), new HashSet<>(u.getAuths()))); } for (UserOperations ops : userOperations) { - AuthorizationsListBase remoteAuths = ops.listEffectiveAuthorizations(callerObject); - AuthorizationsListBase.SubjectIssuerDNPair userDn = new AuthorizationsListBase.SubjectIssuerDNPair(remoteAuths.getUserDn(), - remoteAuths.getIssuerDn()); - authMap.put(userDn, Sets.union(authMap.containsKey(userDn) ? authMap.get(userDn) : Collections.emptySet(), remoteAuths.getAllAuths())); - Map> remoteAuthMap = remoteAuths.getAuths(); - for (Map.Entry> entry : remoteAuthMap.entrySet()) { - AuthorizationsListBase.SubjectIssuerDNPair dn = entry.getKey(); - authMap.put(dn, Sets.union(authMap.containsKey(dn) ? authMap.get(dn) : Collections.emptySet(), entry.getValue())); + try { + AuthorizationsListBase remoteAuths = ops.listEffectiveAuthorizations(callerObject); + AuthorizationsListBase.SubjectIssuerDNPair userDn = new AuthorizationsListBase.SubjectIssuerDNPair(remoteAuths.getUserDn(), + remoteAuths.getIssuerDn()); + authMap.put(userDn, Sets.union(authMap.containsKey(userDn) ? authMap.get(userDn) : Collections.emptySet(), remoteAuths.getAllAuths())); + Map> remoteAuthMap = remoteAuths.getAuths(); + for (Map.Entry> entry : remoteAuthMap.entrySet()) { + AuthorizationsListBase.SubjectIssuerDNPair dn = entry.getKey(); + authMap.put(dn, Sets.union(authMap.containsKey(dn) ? authMap.get(dn) : Collections.emptySet(), entry.getValue())); + } + } catch (Exception e) { + // ignore the exception if shortCircuitExecution is specified as we may never even call that remote logic + if (!shortCircuitExecution) { + throw new AuthorizationException(e); + } } } DatawaveUser primaryUser = principal.getPrimaryUser(); @@ -109,7 +120,14 @@ public DatawavePrincipal getRemoteUser(DatawavePrincipal principal) throws Autho principals.add(principal); } for (UserOperations ops : userOperations) { - principals.add(ops.getRemoteUser(principal)); + try { + principals.add(ops.getRemoteUser(principal)); + } catch (Exception e) { + // ignore the exception if shortCircuitExecution is specified as we may never even call that remote logic + if (!shortCircuitExecution) { + throw new AuthorizationException(e); + } + } } return WSAuthorizationsUtil.mergePrincipals(principals.toArray(new DatawavePrincipal[0])); From 76c9621c5c67a4ed748e5e3f737373e9a875945e Mon Sep 17 00:00:00 2001 From: Ivan Bella Date: Tue, 14 Nov 2023 21:55:58 +0000 Subject: [PATCH 16/32] formatting --- .../query/transformer/GroupingTransform.java | 2 +- .../query/logic/composite/CompositeQueryLogic.java | 14 +++++++------- .../logic/composite/CompositeUserOperations.java | 2 +- .../logic/composite/CompositeQueryLogicTest.java | 14 +++++++------- .../remote/RemoteUserOperationsImpl.java | 4 ++-- 5 files changed, 18 insertions(+), 18 deletions(-) diff --git a/warehouse/query-core/src/main/java/datawave/query/transformer/GroupingTransform.java b/warehouse/query-core/src/main/java/datawave/query/transformer/GroupingTransform.java index 52ad22b4fe..8327188210 100644 --- a/warehouse/query-core/src/main/java/datawave/query/transformer/GroupingTransform.java +++ b/warehouse/query-core/src/main/java/datawave/query/transformer/GroupingTransform.java @@ -113,7 +113,7 @@ public void setQueryExecutionForPageStartTime(long queryExecutionForPageStartTim log.debug("setting query execution page start time to {}", queryExecutionForPageStartTime); super.setQueryExecutionForPageStartTime(queryExecutionForPageStartTime); } - + @Override public Entry flush() { Document document = null; diff --git a/web-services/query/src/main/java/datawave/webservice/query/logic/composite/CompositeQueryLogic.java b/web-services/query/src/main/java/datawave/webservice/query/logic/composite/CompositeQueryLogic.java index 06aa1a01ef..ffcbc57df3 100644 --- a/web-services/query/src/main/java/datawave/webservice/query/logic/composite/CompositeQueryLogic.java +++ b/web-services/query/src/main/java/datawave/webservice/query/logic/composite/CompositeQueryLogic.java @@ -121,10 +121,10 @@ public void run() { startLatch.countDown(); started = true; } - + // ensure we start with a reasonable page time resetPageProcessingStartTime(); - + // the results queue is also an exception handler setUncaughtExceptionHandler(results); boolean success = false; @@ -138,7 +138,7 @@ public void run() { last = transformIterator.next(); if (null != last) { log.debug(Thread.currentThread().getName() + ": Got result"); - + // special logic to deal with intermediate results if (last instanceof EventBase && ((EventBase) last).isIntermediateResult()) { resetPageProcessingStartTime(); @@ -148,7 +148,7 @@ public void run() { last = null; } } - + if (last != null) { results.add(last); resultCount++; @@ -178,7 +178,7 @@ public void run() { log.trace("Finished thread: " + this.getName() + " with success = " + success); } } - + public void resetPageProcessingStartTime() { logic.setPageProcessingStartTime(System.currentTimeMillis()); } @@ -634,7 +634,7 @@ public void setPageProcessingStartTime(long pageProcessingStartTime) { logic.setPageProcessingStartTime(pageProcessingStartTime); } } - + @Override public boolean isLongRunningQuery() { for (QueryLogic l : getQueryLogics().values()) { @@ -644,7 +644,7 @@ public boolean isLongRunningQuery() { } return false; } - + public boolean isAllMustInitialize() { return getConfig().isAllMustInitialize(); } diff --git a/web-services/query/src/main/java/datawave/webservice/query/logic/composite/CompositeUserOperations.java b/web-services/query/src/main/java/datawave/webservice/query/logic/composite/CompositeUserOperations.java index cd2b4880b9..21ec0077aa 100644 --- a/web-services/query/src/main/java/datawave/webservice/query/logic/composite/CompositeUserOperations.java +++ b/web-services/query/src/main/java/datawave/webservice/query/logic/composite/CompositeUserOperations.java @@ -31,7 +31,7 @@ public class CompositeUserOperations implements UserOperations { final List userOperations; final boolean includeLocal; final boolean shortCircuitExecution; - + public CompositeUserOperations(List remoteOperations, boolean includeLocal, boolean shortCircuitExecution, ResponseObjectFactory responseObjectFactory) { this.responseObjectFactory = responseObjectFactory; diff --git a/web-services/query/src/test/java/datawave/webservice/query/logic/composite/CompositeQueryLogicTest.java b/web-services/query/src/test/java/datawave/webservice/query/logic/composite/CompositeQueryLogicTest.java index eee4f84ee5..cdc7b152b4 100644 --- a/web-services/query/src/test/java/datawave/webservice/query/logic/composite/CompositeQueryLogicTest.java +++ b/web-services/query/src/test/java/datawave/webservice/query/logic/composite/CompositeQueryLogicTest.java @@ -397,7 +397,7 @@ public QueryLogicTransformer getTransformer(Query settings) { public GenericQueryConfiguration initialize(AccumuloClient client, Query settings, Set runtimeQueryAuthorizations) throws Exception { return new TestQueryConfiguration(); } - + @Override public boolean isLongRunningQuery() { return true; @@ -1438,20 +1438,20 @@ public void testIsLongRunningQuery() throws Exception { TestQueryLogic logic2 = new TestQueryLogic(); logics.put("TestQueryLogic", logic1); logics.put("TestQueryLogic2", logic2); - + CompositeQueryLogic c = new CompositeQueryLogic(); c.setQueryLogics(logics); - + Assert.assertFalse(c.isLongRunningQuery()); - + TestQueryLogic2 logic3 = new TestQueryLogic2(); logics.put("TestQueryLogic3", logic3); - + c.setQueryLogics(logics); - + Assert.assertTrue(c.isLongRunningQuery()); } - + @Test public void testAuthorizationsUpdate() throws Exception { Map> logics = new HashMap<>(); diff --git a/web-services/security/src/main/java/datawave/security/authorization/remote/RemoteUserOperationsImpl.java b/web-services/security/src/main/java/datawave/security/authorization/remote/RemoteUserOperationsImpl.java index b11ceb41c1..4459b92d55 100644 --- a/web-services/security/src/main/java/datawave/security/authorization/remote/RemoteUserOperationsImpl.java +++ b/web-services/security/src/main/java/datawave/security/authorization/remote/RemoteUserOperationsImpl.java @@ -55,7 +55,7 @@ public DatawavePrincipal getRemoteUser(DatawavePrincipal principal) throws Autho log.info("Cache fault: Retrieving user for " + principal.getPrimaryUser().getDn()); return UserOperations.super.getRemoteUser(principal); } - + @Override @Cacheable(value = "listEffectiveAuthorizations", key = "{#callerObject}", cacheManager = "remoteOperationsCacheManager") public AuthorizationsListBase listEffectiveAuthorizations(Object callerObject) throws AuthorizationException { @@ -91,7 +91,7 @@ public GenericResponse flushCachedCredentials(Object callerObject) throw return readResponse(entity, genericResponseReader); }, () -> suffix); } - + private DatawavePrincipal getDatawavePrincipal(Object callerObject) { if (callerObject instanceof DatawavePrincipal) { return (DatawavePrincipal) callerObject; From 3e4037cd54f9219591a43ef83ae08d1542198d3b Mon Sep 17 00:00:00 2001 From: Ivan Bella Date: Wed, 15 Nov 2023 18:05:03 +0000 Subject: [PATCH 17/32] Updated type-utils from 2.0.0 to 2.0.1 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index f6fb0a3ee7..c22f7dae7e 100644 --- a/pom.xml +++ b/pom.xml @@ -94,7 +94,7 @@ 3.0.0 2.0.0 3.0.0 - 2.0.0 + 2.0.1 1.2 2.23.0 8.0.16 From 313fa9a3e9e6a16abc3188ed6130ee4572a9a4e0 Mon Sep 17 00:00:00 2001 From: jeff <1583214+jschmidt10@users.noreply.github.com> Date: Fri, 17 Nov 2023 12:31:07 -0500 Subject: [PATCH 18/32] Stop caching tableIds in ConfigurableAgeOffFilter (#2161) * Stop tying ConfigurableAgeOffIterators refresh thread to table id * Missing semicolon * Revert back to protected variable --------- Co-authored-by: Jeffrey Schmidt jjschm4 Co-authored-by: hgklohr --- .../filter/ConfigurableAgeOffFilter.java | 17 +++++++++-------- .../filter/ConfigurableAgeOffFilterTest.java | 11 ++++++++++- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/warehouse/core/src/main/java/datawave/iterators/filter/ConfigurableAgeOffFilter.java b/warehouse/core/src/main/java/datawave/iterators/filter/ConfigurableAgeOffFilter.java index 2876a46fcb..0885f676d4 100644 --- a/warehouse/core/src/main/java/datawave/iterators/filter/ConfigurableAgeOffFilter.java +++ b/warehouse/core/src/main/java/datawave/iterators/filter/ConfigurableAgeOffFilter.java @@ -8,13 +8,12 @@ import java.util.Iterator; import java.util.List; import java.util.Map; -import java.util.Objects; import java.util.TreeMap; import java.util.concurrent.ExecutionException; import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.concurrent.TimeUnit; -import org.apache.accumulo.core.conf.AccumuloConfiguration; +import org.apache.accumulo.core.client.PluginEnvironment; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.iterators.Filter; @@ -137,6 +136,8 @@ public class ConfigurableAgeOffFilter extends Filter implements OptionDescriber protected IteratorEnvironment myEnv; + private PluginEnvironment pluginEnv; + // Adding the ability to disable the filter checks in the case of a system-initialized major compaction for example. // The thought is that we force compactions where we want the data to aged off. // The system-initialized compactions are on data just imported in which case they are not expected to remove much. @@ -199,6 +200,7 @@ public boolean accept(Key k, Value v) { public SortedKeyValueIterator deepCopy(IteratorEnvironment env) { myEnv = env; + pluginEnv = env == null ? null : env.getPluginEnv(); return ((ConfigurableAgeOffFilter) super.deepCopy(env)).initialize(this); } @@ -312,6 +314,7 @@ public void init(SortedKeyValueIterator source, Map op super.init(source, options, env); myEnv = env; + pluginEnv = env == null ? null : env.getPluginEnv(); // disabled if this is a system initialized major compaction and we are configured to disable as such String disableOnNonFullMajcStr = options.get(AgeOffConfigParams.DISABLE_ON_NON_FULL_MAJC); @@ -394,12 +397,10 @@ private void initFilterRules() throws IllegalArgumentException, IOException { } private long getLongProperty(final String prop, final long defaultValue) { - if (this.myEnv != null && this.myEnv.getConfig() != null) { - AccumuloConfiguration conf = this.myEnv.getConfig(); - Map properties = new TreeMap<>(); - conf.getProperties(properties, p -> Objects.equals(prop, p)); - if (properties.containsKey(prop)) { - return Long.parseLong(properties.get(prop)); + if (pluginEnv != null && pluginEnv.getConfiguration() != null) { + String propValue = pluginEnv.getConfiguration().get(prop); + if (propValue != null) { + return Long.parseLong(propValue); } } return defaultValue; diff --git a/warehouse/core/src/test/java/datawave/iterators/filter/ConfigurableAgeOffFilterTest.java b/warehouse/core/src/test/java/datawave/iterators/filter/ConfigurableAgeOffFilterTest.java index c8bc8b94eb..c8d5f0411f 100644 --- a/warehouse/core/src/test/java/datawave/iterators/filter/ConfigurableAgeOffFilterTest.java +++ b/warehouse/core/src/test/java/datawave/iterators/filter/ConfigurableAgeOffFilterTest.java @@ -15,6 +15,7 @@ import java.util.List; import java.util.Map; +import org.apache.accumulo.core.client.PluginEnvironment; import org.apache.accumulo.core.conf.AccumuloConfiguration; import org.apache.accumulo.core.conf.DefaultConfiguration; import org.apache.accumulo.core.data.Key; @@ -22,6 +23,7 @@ import org.apache.accumulo.core.iterators.IteratorEnvironment; import org.apache.accumulo.core.iterators.IteratorUtil; import org.apache.accumulo.core.iterators.SortedKeyValueIterator; +import org.apache.accumulo.core.util.ConfigurationImpl; import org.easymock.EasyMockRunner; import org.easymock.EasyMockSupport; import org.easymock.Mock; @@ -42,17 +44,24 @@ public class ConfigurableAgeOffFilterTest extends EasyMockSupport { @Mock private IteratorEnvironment env; @Mock + private PluginEnvironment pluginEnv; + @Mock private SortedKeyValueIterator source; private AccumuloConfiguration conf = DefaultConfiguration.getInstance(); @Before public void setUp() throws Exception { + expect(pluginEnv.getConfiguration()).andReturn(new ConfigurationImpl(conf)).anyTimes(); + expect(env.getConfig()).andReturn(conf).anyTimes(); + expect(env.getPluginEnv()).andReturn(pluginEnv).anyTimes(); + // These two are only for the disabled test expect(env.getIteratorScope()).andReturn(IteratorUtil.IteratorScope.majc).anyTimes(); expect(env.isFullMajorCompaction()).andReturn(false).anyTimes(); - replay(env); + + replay(env, pluginEnv); } @Test From 86386a0bef32cbb44ff2b793bd26e5890c141caa Mon Sep 17 00:00:00 2001 From: hgklohr Date: Fri, 17 Nov 2023 17:42:36 +0000 Subject: [PATCH 19/32] Update pom's for 6.3.0-SNAPSHOT --- common-test/pom.xml | 2 +- core/pom.xml | 2 +- core/utils/pom.xml | 2 +- docs/pom.xml | 2 +- microservices/pom.xml | 2 +- microservices/services/pom.xml | 2 +- microservices/starters/pom.xml | 2 +- pom.xml | 2 +- warehouse/accumulo-extensions/pom.xml | 2 +- warehouse/assemble/datawave/pom.xml | 2 +- warehouse/assemble/pom.xml | 2 +- warehouse/assemble/webservice/pom.xml | 2 +- warehouse/common/pom.xml | 2 +- warehouse/core/pom.xml | 2 +- warehouse/data-dictionary-core/pom.xml | 2 +- warehouse/edge-dictionary-core/pom.xml | 2 +- warehouse/edge-model-configuration-core/pom.xml | 2 +- warehouse/index-stats/pom.xml | 2 +- warehouse/ingest-configuration/pom.xml | 2 +- warehouse/ingest-core/pom.xml | 2 +- warehouse/ingest-csv/pom.xml | 2 +- warehouse/ingest-json/pom.xml | 2 +- warehouse/ingest-nyctlc/pom.xml | 2 +- warehouse/ingest-scripts/pom.xml | 2 +- warehouse/ingest-wikipedia/pom.xml | 2 +- warehouse/metrics-core/pom.xml | 2 +- warehouse/ops-tools/config-compare/pom.xml | 2 +- warehouse/ops-tools/index-validation/pom.xml | 2 +- warehouse/ops-tools/pom.xml | 2 +- warehouse/pom.xml | 2 +- warehouse/query-core/pom.xml | 2 +- warehouse/regression-testing/pom.xml | 2 +- web-services/accumulo/pom.xml | 2 +- web-services/atom/pom.xml | 2 +- web-services/cached-results/pom.xml | 2 +- web-services/client/pom.xml | 2 +- web-services/common-util/pom.xml | 2 +- web-services/common/pom.xml | 2 +- web-services/deploy/application/pom.xml | 2 +- web-services/deploy/configuration/pom.xml | 2 +- web-services/deploy/docs/pom.xml | 2 +- web-services/deploy/pom.xml | 2 +- web-services/deploy/spring-framework-integration/pom.xml | 2 +- web-services/dictionary/pom.xml | 2 +- web-services/examples/client-login/pom.xml | 2 +- web-services/examples/http-client/pom.xml | 2 +- web-services/examples/jms-client/pom.xml | 2 +- web-services/examples/pom.xml | 2 +- web-services/examples/query-war/pom.xml | 2 +- web-services/map-reduce-embedded/pom.xml | 2 +- web-services/map-reduce-status/pom.xml | 2 +- web-services/map-reduce/pom.xml | 2 +- web-services/model/pom.xml | 2 +- web-services/modification/pom.xml | 2 +- web-services/pom.xml | 2 +- web-services/query-websocket/pom.xml | 2 +- web-services/query/pom.xml | 2 +- web-services/rest-api/pom.xml | 2 +- web-services/security/pom.xml | 2 +- web-services/web-root/pom.xml | 2 +- 60 files changed, 60 insertions(+), 60 deletions(-) diff --git a/common-test/pom.xml b/common-test/pom.xml index aff6dea1bd..405fa34e81 100644 --- a/common-test/pom.xml +++ b/common-test/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-common-test ${project.artifactId} diff --git a/core/pom.xml b/core/pom.xml index ac69dd13f2..c8024bedf8 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT gov.nsa.datawave.core datawave-core-parent diff --git a/core/utils/pom.xml b/core/utils/pom.xml index 24055c6f50..d08cd61b66 100644 --- a/core/utils/pom.xml +++ b/core/utils/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT gov.nsa.datawave.core datawave-utils-parent diff --git a/docs/pom.xml b/docs/pom.xml index 974f5490c6..eb7ccee507 100644 --- a/docs/pom.xml +++ b/docs/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-docs diff --git a/microservices/pom.xml b/microservices/pom.xml index e36c5b7a4c..1b799e5059 100644 --- a/microservices/pom.xml +++ b/microservices/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT gov.nsa.datawave.microservice datawave-microservice-build-parent diff --git a/microservices/services/pom.xml b/microservices/services/pom.xml index a25ba38643..135a06eb44 100644 --- a/microservices/services/pom.xml +++ b/microservices/services/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.microservice datawave-microservice-build-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-microservice-service-build-parent pom diff --git a/microservices/starters/pom.xml b/microservices/starters/pom.xml index aafabbe941..7dc0ecdc59 100644 --- a/microservices/starters/pom.xml +++ b/microservices/starters/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.microservice datawave-microservice-build-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-microservice-starter-build-parent pom diff --git a/pom.xml b/pom.xml index c22f7dae7e..1cd6c2bf12 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 gov.nsa.datawave datawave-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT pom DataWave DataWave is a Java-based ingest and query framework that leverages Apache Accumulo to provide fast, secure access to your data. diff --git a/warehouse/accumulo-extensions/pom.xml b/warehouse/accumulo-extensions/pom.xml index 3932405612..a1e6435301 100644 --- a/warehouse/accumulo-extensions/pom.xml +++ b/warehouse/accumulo-extensions/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-accumulo-extensions ${project.artifactId} diff --git a/warehouse/assemble/datawave/pom.xml b/warehouse/assemble/datawave/pom.xml index 78c60a7308..bab3f46aad 100644 --- a/warehouse/assemble/datawave/pom.xml +++ b/warehouse/assemble/datawave/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave assemble-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT assemble-datawave pom diff --git a/warehouse/assemble/pom.xml b/warehouse/assemble/pom.xml index c47bd27672..685f3453ea 100644 --- a/warehouse/assemble/pom.xml +++ b/warehouse/assemble/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT assemble-parent pom diff --git a/warehouse/assemble/webservice/pom.xml b/warehouse/assemble/webservice/pom.xml index 472697b007..50072ecf43 100644 --- a/warehouse/assemble/webservice/pom.xml +++ b/warehouse/assemble/webservice/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave assemble-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT assemble-webservice ${project.artifactId} diff --git a/warehouse/common/pom.xml b/warehouse/common/pom.xml index 1120d8779b..27d9d3733d 100644 --- a/warehouse/common/pom.xml +++ b/warehouse/common/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-common ${project.artifactId} diff --git a/warehouse/core/pom.xml b/warehouse/core/pom.xml index 39033caea7..cb760b5e35 100644 --- a/warehouse/core/pom.xml +++ b/warehouse/core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-core jar diff --git a/warehouse/data-dictionary-core/pom.xml b/warehouse/data-dictionary-core/pom.xml index 08a9504422..bf62ab8538 100644 --- a/warehouse/data-dictionary-core/pom.xml +++ b/warehouse/data-dictionary-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-data-dictionary-core jar diff --git a/warehouse/edge-dictionary-core/pom.xml b/warehouse/edge-dictionary-core/pom.xml index 0e66a8adfc..47ab7673e3 100644 --- a/warehouse/edge-dictionary-core/pom.xml +++ b/warehouse/edge-dictionary-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-edge-dictionary-core jar diff --git a/warehouse/edge-model-configuration-core/pom.xml b/warehouse/edge-model-configuration-core/pom.xml index 02c2f1c0c5..4311693d9b 100644 --- a/warehouse/edge-model-configuration-core/pom.xml +++ b/warehouse/edge-model-configuration-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-edge-model-configuration-core jar diff --git a/warehouse/index-stats/pom.xml b/warehouse/index-stats/pom.xml index 48dc0d24db..12c18cd06e 100644 --- a/warehouse/index-stats/pom.xml +++ b/warehouse/index-stats/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-index-stats jar diff --git a/warehouse/ingest-configuration/pom.xml b/warehouse/ingest-configuration/pom.xml index bfa7ae2aef..3b35b21fbf 100644 --- a/warehouse/ingest-configuration/pom.xml +++ b/warehouse/ingest-configuration/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-ingest-configuration diff --git a/warehouse/ingest-core/pom.xml b/warehouse/ingest-core/pom.xml index c85cd46e7b..dc76b25fd8 100644 --- a/warehouse/ingest-core/pom.xml +++ b/warehouse/ingest-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-ingest-core jar diff --git a/warehouse/ingest-csv/pom.xml b/warehouse/ingest-csv/pom.xml index 469c5a3edc..2bb1a28779 100644 --- a/warehouse/ingest-csv/pom.xml +++ b/warehouse/ingest-csv/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-ingest-csv jar diff --git a/warehouse/ingest-json/pom.xml b/warehouse/ingest-json/pom.xml index 071865d3e2..7550b86904 100644 --- a/warehouse/ingest-json/pom.xml +++ b/warehouse/ingest-json/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-ingest-json jar diff --git a/warehouse/ingest-nyctlc/pom.xml b/warehouse/ingest-nyctlc/pom.xml index 615f06435d..8aa3477664 100644 --- a/warehouse/ingest-nyctlc/pom.xml +++ b/warehouse/ingest-nyctlc/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-ingest-nyctlc jar diff --git a/warehouse/ingest-scripts/pom.xml b/warehouse/ingest-scripts/pom.xml index f5fa216a2a..42bdce42fd 100644 --- a/warehouse/ingest-scripts/pom.xml +++ b/warehouse/ingest-scripts/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-ingest-scripts ${project.artifactId} diff --git a/warehouse/ingest-wikipedia/pom.xml b/warehouse/ingest-wikipedia/pom.xml index fa8b597b91..2e472e2483 100644 --- a/warehouse/ingest-wikipedia/pom.xml +++ b/warehouse/ingest-wikipedia/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-ingest-wikipedia jar diff --git a/warehouse/metrics-core/pom.xml b/warehouse/metrics-core/pom.xml index 106b05001a..aac2a6ff79 100644 --- a/warehouse/metrics-core/pom.xml +++ b/warehouse/metrics-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-metrics-core jar diff --git a/warehouse/ops-tools/config-compare/pom.xml b/warehouse/ops-tools/config-compare/pom.xml index 83cc229db2..c88f600a06 100644 --- a/warehouse/ops-tools/config-compare/pom.xml +++ b/warehouse/ops-tools/config-compare/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-ops-tools-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-ops-tools-config-compare diff --git a/warehouse/ops-tools/index-validation/pom.xml b/warehouse/ops-tools/index-validation/pom.xml index 822e8f2137..7c65c30a05 100644 --- a/warehouse/ops-tools/index-validation/pom.xml +++ b/warehouse/ops-tools/index-validation/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-ops-tools-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-ops-tools-index-validation jar diff --git a/warehouse/ops-tools/pom.xml b/warehouse/ops-tools/pom.xml index 7d4c0307b4..6597d9132b 100644 --- a/warehouse/ops-tools/pom.xml +++ b/warehouse/ops-tools/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-ops-tools-parent pom diff --git a/warehouse/pom.xml b/warehouse/pom.xml index 8f4e14cdd5..7a5792344e 100644 --- a/warehouse/pom.xml +++ b/warehouse/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-warehouse-parent pom diff --git a/warehouse/query-core/pom.xml b/warehouse/query-core/pom.xml index 9051a0c515..cb83b19629 100644 --- a/warehouse/query-core/pom.xml +++ b/warehouse/query-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-query-core jar diff --git a/warehouse/regression-testing/pom.xml b/warehouse/regression-testing/pom.xml index 46c31eefdd..0c9b4ceb14 100644 --- a/warehouse/regression-testing/pom.xml +++ b/warehouse/regression-testing/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-regression-testing ${project.artifactId} diff --git a/web-services/accumulo/pom.xml b/web-services/accumulo/pom.xml index 5c231fe4bb..6f476cd143 100644 --- a/web-services/accumulo/pom.xml +++ b/web-services/accumulo/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-ws-accumulo ejb diff --git a/web-services/atom/pom.xml b/web-services/atom/pom.xml index d376a08f2c..a54a427099 100644 --- a/web-services/atom/pom.xml +++ b/web-services/atom/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-ws-atom ejb diff --git a/web-services/cached-results/pom.xml b/web-services/cached-results/pom.xml index 49940b838e..2f77938857 100644 --- a/web-services/cached-results/pom.xml +++ b/web-services/cached-results/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-ws-cached-results ejb diff --git a/web-services/client/pom.xml b/web-services/client/pom.xml index 183ffbc9fb..cf9614748c 100644 --- a/web-services/client/pom.xml +++ b/web-services/client/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-ws-client jar diff --git a/web-services/common-util/pom.xml b/web-services/common-util/pom.xml index 727136df2e..e653d6d048 100644 --- a/web-services/common-util/pom.xml +++ b/web-services/common-util/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-ws-common-util jar diff --git a/web-services/common/pom.xml b/web-services/common/pom.xml index c76f78a65d..a899440f64 100644 --- a/web-services/common/pom.xml +++ b/web-services/common/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-ws-common ejb diff --git a/web-services/deploy/application/pom.xml b/web-services/deploy/application/pom.xml index 1ea3bf96b6..824993588d 100644 --- a/web-services/deploy/application/pom.xml +++ b/web-services/deploy/application/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-deploy-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-ws-deploy-application ear diff --git a/web-services/deploy/configuration/pom.xml b/web-services/deploy/configuration/pom.xml index 550641f0cf..1632c47eb5 100644 --- a/web-services/deploy/configuration/pom.xml +++ b/web-services/deploy/configuration/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-deploy-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-ws-deploy-configuration jar diff --git a/web-services/deploy/docs/pom.xml b/web-services/deploy/docs/pom.xml index fd39ec227f..cb39f04b6d 100644 --- a/web-services/deploy/docs/pom.xml +++ b/web-services/deploy/docs/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-deploy-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-ws-deploy-docs war diff --git a/web-services/deploy/pom.xml b/web-services/deploy/pom.xml index a06c2353cc..08b04cdf30 100644 --- a/web-services/deploy/pom.xml +++ b/web-services/deploy/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT gov.nsa.datawave.webservices datawave-ws-deploy-parent diff --git a/web-services/deploy/spring-framework-integration/pom.xml b/web-services/deploy/spring-framework-integration/pom.xml index be7c3fba17..687a6581f6 100644 --- a/web-services/deploy/spring-framework-integration/pom.xml +++ b/web-services/deploy/spring-framework-integration/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-deploy-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT spring-framework-integration ${project.artifactId} diff --git a/web-services/dictionary/pom.xml b/web-services/dictionary/pom.xml index cf464ebdb3..320d6380d7 100644 --- a/web-services/dictionary/pom.xml +++ b/web-services/dictionary/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-ws-dictionary ejb diff --git a/web-services/examples/client-login/pom.xml b/web-services/examples/client-login/pom.xml index 33b61757aa..8b5dae10fe 100644 --- a/web-services/examples/client-login/pom.xml +++ b/web-services/examples/client-login/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-examples-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-ws-examples-client-login ejb diff --git a/web-services/examples/http-client/pom.xml b/web-services/examples/http-client/pom.xml index dd881d6bbe..47de7e3865 100644 --- a/web-services/examples/http-client/pom.xml +++ b/web-services/examples/http-client/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-examples-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-ws-examples-http-client jar diff --git a/web-services/examples/jms-client/pom.xml b/web-services/examples/jms-client/pom.xml index 16b4062827..8c05cb89aa 100644 --- a/web-services/examples/jms-client/pom.xml +++ b/web-services/examples/jms-client/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-examples-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-ws-examples-jms-client jar diff --git a/web-services/examples/pom.xml b/web-services/examples/pom.xml index b2f96c2db1..180d3b0ef5 100644 --- a/web-services/examples/pom.xml +++ b/web-services/examples/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-ws-examples-parent pom diff --git a/web-services/examples/query-war/pom.xml b/web-services/examples/query-war/pom.xml index 99df0b58dc..650b073a94 100644 --- a/web-services/examples/query-war/pom.xml +++ b/web-services/examples/query-war/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-examples-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-ws-examples-query-war war diff --git a/web-services/map-reduce-embedded/pom.xml b/web-services/map-reduce-embedded/pom.xml index fd28d8a678..b65b392760 100644 --- a/web-services/map-reduce-embedded/pom.xml +++ b/web-services/map-reduce-embedded/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-ws-map-reduce-embedded jar diff --git a/web-services/map-reduce-status/pom.xml b/web-services/map-reduce-status/pom.xml index 1f6a3cc05f..fa50ca78d8 100644 --- a/web-services/map-reduce-status/pom.xml +++ b/web-services/map-reduce-status/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-ws-map-reduce-status ejb diff --git a/web-services/map-reduce/pom.xml b/web-services/map-reduce/pom.xml index 6fcd98a75f..e3283ebb67 100644 --- a/web-services/map-reduce/pom.xml +++ b/web-services/map-reduce/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-ws-map-reduce ejb diff --git a/web-services/model/pom.xml b/web-services/model/pom.xml index 525527b6a4..12611980f9 100644 --- a/web-services/model/pom.xml +++ b/web-services/model/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-ws-model ejb diff --git a/web-services/modification/pom.xml b/web-services/modification/pom.xml index 3a83535b59..fa4b4a8d3b 100644 --- a/web-services/modification/pom.xml +++ b/web-services/modification/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-ws-modification ejb diff --git a/web-services/pom.xml b/web-services/pom.xml index 193861d42c..599971cb49 100644 --- a/web-services/pom.xml +++ b/web-services/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT gov.nsa.datawave.webservices datawave-ws-parent diff --git a/web-services/query-websocket/pom.xml b/web-services/query-websocket/pom.xml index e6b26792de..48f9a17255 100644 --- a/web-services/query-websocket/pom.xml +++ b/web-services/query-websocket/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-ws-query-websocket war diff --git a/web-services/query/pom.xml b/web-services/query/pom.xml index ba56c57529..f130628adf 100644 --- a/web-services/query/pom.xml +++ b/web-services/query/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-ws-query ejb diff --git a/web-services/rest-api/pom.xml b/web-services/rest-api/pom.xml index b6b42df740..7408e63c52 100644 --- a/web-services/rest-api/pom.xml +++ b/web-services/rest-api/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-ws-rest-api war diff --git a/web-services/security/pom.xml b/web-services/security/pom.xml index 2c6b9670e9..509af5634c 100644 --- a/web-services/security/pom.xml +++ b/web-services/security/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-ws-security ejb diff --git a/web-services/web-root/pom.xml b/web-services/web-root/pom.xml index 8897003cf4..c0e19fa04f 100644 --- a/web-services/web-root/pom.xml +++ b/web-services/web-root/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.3.0-SNAPSHOT datawave-ws-web-root war From af505a725dfc3a10a8352af5a83baf152be23b25 Mon Sep 17 00:00:00 2001 From: hgklohr Date: Fri, 17 Nov 2023 17:43:57 +0000 Subject: [PATCH 20/32] 6.2.0 --- common-test/pom.xml | 2 +- core/pom.xml | 2 +- core/utils/pom.xml | 2 +- docs/pom.xml | 2 +- microservices/pom.xml | 2 +- microservices/services/pom.xml | 2 +- microservices/starters/pom.xml | 2 +- pom.xml | 2 +- warehouse/accumulo-extensions/pom.xml | 2 +- warehouse/assemble/datawave/pom.xml | 2 +- warehouse/assemble/pom.xml | 2 +- warehouse/assemble/webservice/pom.xml | 2 +- warehouse/common/pom.xml | 2 +- warehouse/core/pom.xml | 2 +- warehouse/data-dictionary-core/pom.xml | 2 +- warehouse/edge-dictionary-core/pom.xml | 2 +- warehouse/edge-model-configuration-core/pom.xml | 2 +- warehouse/index-stats/pom.xml | 2 +- warehouse/ingest-configuration/pom.xml | 2 +- warehouse/ingest-core/pom.xml | 2 +- warehouse/ingest-csv/pom.xml | 2 +- warehouse/ingest-json/pom.xml | 2 +- warehouse/ingest-nyctlc/pom.xml | 2 +- warehouse/ingest-scripts/pom.xml | 2 +- warehouse/ingest-wikipedia/pom.xml | 2 +- warehouse/metrics-core/pom.xml | 2 +- warehouse/ops-tools/config-compare/pom.xml | 2 +- warehouse/ops-tools/index-validation/pom.xml | 2 +- warehouse/ops-tools/pom.xml | 2 +- warehouse/pom.xml | 2 +- warehouse/query-core/pom.xml | 2 +- warehouse/regression-testing/pom.xml | 2 +- web-services/accumulo/pom.xml | 2 +- web-services/atom/pom.xml | 2 +- web-services/cached-results/pom.xml | 2 +- web-services/client/pom.xml | 2 +- web-services/common-util/pom.xml | 2 +- web-services/common/pom.xml | 2 +- web-services/deploy/application/pom.xml | 2 +- web-services/deploy/configuration/pom.xml | 2 +- web-services/deploy/docs/pom.xml | 2 +- web-services/deploy/pom.xml | 2 +- web-services/deploy/spring-framework-integration/pom.xml | 2 +- web-services/dictionary/pom.xml | 2 +- web-services/examples/client-login/pom.xml | 2 +- web-services/examples/http-client/pom.xml | 2 +- web-services/examples/jms-client/pom.xml | 2 +- web-services/examples/pom.xml | 2 +- web-services/examples/query-war/pom.xml | 2 +- web-services/map-reduce-embedded/pom.xml | 2 +- web-services/map-reduce-status/pom.xml | 2 +- web-services/map-reduce/pom.xml | 2 +- web-services/model/pom.xml | 2 +- web-services/modification/pom.xml | 2 +- web-services/pom.xml | 2 +- web-services/query-websocket/pom.xml | 2 +- web-services/query/pom.xml | 2 +- web-services/rest-api/pom.xml | 2 +- web-services/security/pom.xml | 2 +- web-services/web-root/pom.xml | 2 +- 60 files changed, 60 insertions(+), 60 deletions(-) diff --git a/common-test/pom.xml b/common-test/pom.xml index aff6dea1bd..88c3a3c3a6 100644 --- a/common-test/pom.xml +++ b/common-test/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-common-test ${project.artifactId} diff --git a/core/pom.xml b/core/pom.xml index ac69dd13f2..6d24bbbd36 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 6.2.0-SNAPSHOT + 6.2.0 gov.nsa.datawave.core datawave-core-parent diff --git a/core/utils/pom.xml b/core/utils/pom.xml index 24055c6f50..2175f7b953 100644 --- a/core/utils/pom.xml +++ b/core/utils/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 6.2.0-SNAPSHOT + 6.2.0 gov.nsa.datawave.core datawave-utils-parent diff --git a/docs/pom.xml b/docs/pom.xml index 974f5490c6..f48490b7d1 100644 --- a/docs/pom.xml +++ b/docs/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-docs diff --git a/microservices/pom.xml b/microservices/pom.xml index e36c5b7a4c..a0555825b2 100644 --- a/microservices/pom.xml +++ b/microservices/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 6.2.0-SNAPSHOT + 6.2.0 gov.nsa.datawave.microservice datawave-microservice-build-parent diff --git a/microservices/services/pom.xml b/microservices/services/pom.xml index a25ba38643..2ca1136957 100644 --- a/microservices/services/pom.xml +++ b/microservices/services/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.microservice datawave-microservice-build-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-microservice-service-build-parent pom diff --git a/microservices/starters/pom.xml b/microservices/starters/pom.xml index aafabbe941..4a0f9f7f6d 100644 --- a/microservices/starters/pom.xml +++ b/microservices/starters/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.microservice datawave-microservice-build-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-microservice-starter-build-parent pom diff --git a/pom.xml b/pom.xml index c22f7dae7e..a3b4c544b9 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 gov.nsa.datawave datawave-parent - 6.2.0-SNAPSHOT + 6.2.0 pom DataWave DataWave is a Java-based ingest and query framework that leverages Apache Accumulo to provide fast, secure access to your data. diff --git a/warehouse/accumulo-extensions/pom.xml b/warehouse/accumulo-extensions/pom.xml index 3932405612..9483669f51 100644 --- a/warehouse/accumulo-extensions/pom.xml +++ b/warehouse/accumulo-extensions/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-accumulo-extensions ${project.artifactId} diff --git a/warehouse/assemble/datawave/pom.xml b/warehouse/assemble/datawave/pom.xml index 78c60a7308..b3f4a867b8 100644 --- a/warehouse/assemble/datawave/pom.xml +++ b/warehouse/assemble/datawave/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave assemble-parent - 6.2.0-SNAPSHOT + 6.2.0 assemble-datawave pom diff --git a/warehouse/assemble/pom.xml b/warehouse/assemble/pom.xml index c47bd27672..2bf419a44f 100644 --- a/warehouse/assemble/pom.xml +++ b/warehouse/assemble/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.2.0 assemble-parent pom diff --git a/warehouse/assemble/webservice/pom.xml b/warehouse/assemble/webservice/pom.xml index 472697b007..e5d55a22c4 100644 --- a/warehouse/assemble/webservice/pom.xml +++ b/warehouse/assemble/webservice/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave assemble-parent - 6.2.0-SNAPSHOT + 6.2.0 assemble-webservice ${project.artifactId} diff --git a/warehouse/common/pom.xml b/warehouse/common/pom.xml index 1120d8779b..ff4cec2dac 100644 --- a/warehouse/common/pom.xml +++ b/warehouse/common/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-common ${project.artifactId} diff --git a/warehouse/core/pom.xml b/warehouse/core/pom.xml index 39033caea7..e2f7e35985 100644 --- a/warehouse/core/pom.xml +++ b/warehouse/core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-core jar diff --git a/warehouse/data-dictionary-core/pom.xml b/warehouse/data-dictionary-core/pom.xml index 08a9504422..d6b24ba8b2 100644 --- a/warehouse/data-dictionary-core/pom.xml +++ b/warehouse/data-dictionary-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-data-dictionary-core jar diff --git a/warehouse/edge-dictionary-core/pom.xml b/warehouse/edge-dictionary-core/pom.xml index 0e66a8adfc..745129f200 100644 --- a/warehouse/edge-dictionary-core/pom.xml +++ b/warehouse/edge-dictionary-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-edge-dictionary-core jar diff --git a/warehouse/edge-model-configuration-core/pom.xml b/warehouse/edge-model-configuration-core/pom.xml index 02c2f1c0c5..b38f3ff77c 100644 --- a/warehouse/edge-model-configuration-core/pom.xml +++ b/warehouse/edge-model-configuration-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-edge-model-configuration-core jar diff --git a/warehouse/index-stats/pom.xml b/warehouse/index-stats/pom.xml index 48dc0d24db..75c17876db 100644 --- a/warehouse/index-stats/pom.xml +++ b/warehouse/index-stats/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-index-stats jar diff --git a/warehouse/ingest-configuration/pom.xml b/warehouse/ingest-configuration/pom.xml index bfa7ae2aef..465c7760ac 100644 --- a/warehouse/ingest-configuration/pom.xml +++ b/warehouse/ingest-configuration/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-ingest-configuration diff --git a/warehouse/ingest-core/pom.xml b/warehouse/ingest-core/pom.xml index c85cd46e7b..0dcb900d61 100644 --- a/warehouse/ingest-core/pom.xml +++ b/warehouse/ingest-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-ingest-core jar diff --git a/warehouse/ingest-csv/pom.xml b/warehouse/ingest-csv/pom.xml index 469c5a3edc..e4afc7471c 100644 --- a/warehouse/ingest-csv/pom.xml +++ b/warehouse/ingest-csv/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-ingest-csv jar diff --git a/warehouse/ingest-json/pom.xml b/warehouse/ingest-json/pom.xml index 071865d3e2..069cba833d 100644 --- a/warehouse/ingest-json/pom.xml +++ b/warehouse/ingest-json/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-ingest-json jar diff --git a/warehouse/ingest-nyctlc/pom.xml b/warehouse/ingest-nyctlc/pom.xml index 615f06435d..8c5549dfa5 100644 --- a/warehouse/ingest-nyctlc/pom.xml +++ b/warehouse/ingest-nyctlc/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-ingest-nyctlc jar diff --git a/warehouse/ingest-scripts/pom.xml b/warehouse/ingest-scripts/pom.xml index f5fa216a2a..a11795780d 100644 --- a/warehouse/ingest-scripts/pom.xml +++ b/warehouse/ingest-scripts/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-ingest-scripts ${project.artifactId} diff --git a/warehouse/ingest-wikipedia/pom.xml b/warehouse/ingest-wikipedia/pom.xml index fa8b597b91..e83b530bed 100644 --- a/warehouse/ingest-wikipedia/pom.xml +++ b/warehouse/ingest-wikipedia/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-ingest-wikipedia jar diff --git a/warehouse/metrics-core/pom.xml b/warehouse/metrics-core/pom.xml index 106b05001a..8abae9d46f 100644 --- a/warehouse/metrics-core/pom.xml +++ b/warehouse/metrics-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-metrics-core jar diff --git a/warehouse/ops-tools/config-compare/pom.xml b/warehouse/ops-tools/config-compare/pom.xml index 83cc229db2..296deaeed7 100644 --- a/warehouse/ops-tools/config-compare/pom.xml +++ b/warehouse/ops-tools/config-compare/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-ops-tools-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-ops-tools-config-compare diff --git a/warehouse/ops-tools/index-validation/pom.xml b/warehouse/ops-tools/index-validation/pom.xml index 822e8f2137..28c1bd2521 100644 --- a/warehouse/ops-tools/index-validation/pom.xml +++ b/warehouse/ops-tools/index-validation/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-ops-tools-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-ops-tools-index-validation jar diff --git a/warehouse/ops-tools/pom.xml b/warehouse/ops-tools/pom.xml index 7d4c0307b4..72c5a60e2a 100644 --- a/warehouse/ops-tools/pom.xml +++ b/warehouse/ops-tools/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-ops-tools-parent pom diff --git a/warehouse/pom.xml b/warehouse/pom.xml index 8f4e14cdd5..2b3d286ec9 100644 --- a/warehouse/pom.xml +++ b/warehouse/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-warehouse-parent pom diff --git a/warehouse/query-core/pom.xml b/warehouse/query-core/pom.xml index 9051a0c515..6b694f2d3d 100644 --- a/warehouse/query-core/pom.xml +++ b/warehouse/query-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-query-core jar diff --git a/warehouse/regression-testing/pom.xml b/warehouse/regression-testing/pom.xml index 46c31eefdd..23b8a67f24 100644 --- a/warehouse/regression-testing/pom.xml +++ b/warehouse/regression-testing/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-regression-testing ${project.artifactId} diff --git a/web-services/accumulo/pom.xml b/web-services/accumulo/pom.xml index 5c231fe4bb..47f57e2380 100644 --- a/web-services/accumulo/pom.xml +++ b/web-services/accumulo/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-ws-accumulo ejb diff --git a/web-services/atom/pom.xml b/web-services/atom/pom.xml index d376a08f2c..9eda58aad9 100644 --- a/web-services/atom/pom.xml +++ b/web-services/atom/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-ws-atom ejb diff --git a/web-services/cached-results/pom.xml b/web-services/cached-results/pom.xml index 49940b838e..974b90089d 100644 --- a/web-services/cached-results/pom.xml +++ b/web-services/cached-results/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-ws-cached-results ejb diff --git a/web-services/client/pom.xml b/web-services/client/pom.xml index 183ffbc9fb..24b323c503 100644 --- a/web-services/client/pom.xml +++ b/web-services/client/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-ws-client jar diff --git a/web-services/common-util/pom.xml b/web-services/common-util/pom.xml index 727136df2e..90fec5ca2b 100644 --- a/web-services/common-util/pom.xml +++ b/web-services/common-util/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-ws-common-util jar diff --git a/web-services/common/pom.xml b/web-services/common/pom.xml index c76f78a65d..1de9afb833 100644 --- a/web-services/common/pom.xml +++ b/web-services/common/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-ws-common ejb diff --git a/web-services/deploy/application/pom.xml b/web-services/deploy/application/pom.xml index 1ea3bf96b6..e33777d8f2 100644 --- a/web-services/deploy/application/pom.xml +++ b/web-services/deploy/application/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-deploy-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-ws-deploy-application ear diff --git a/web-services/deploy/configuration/pom.xml b/web-services/deploy/configuration/pom.xml index 550641f0cf..ae6603af5c 100644 --- a/web-services/deploy/configuration/pom.xml +++ b/web-services/deploy/configuration/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-deploy-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-ws-deploy-configuration jar diff --git a/web-services/deploy/docs/pom.xml b/web-services/deploy/docs/pom.xml index fd39ec227f..c00e6b6d0b 100644 --- a/web-services/deploy/docs/pom.xml +++ b/web-services/deploy/docs/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-deploy-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-ws-deploy-docs war diff --git a/web-services/deploy/pom.xml b/web-services/deploy/pom.xml index a06c2353cc..30846defce 100644 --- a/web-services/deploy/pom.xml +++ b/web-services/deploy/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.2.0 gov.nsa.datawave.webservices datawave-ws-deploy-parent diff --git a/web-services/deploy/spring-framework-integration/pom.xml b/web-services/deploy/spring-framework-integration/pom.xml index be7c3fba17..00dc15232b 100644 --- a/web-services/deploy/spring-framework-integration/pom.xml +++ b/web-services/deploy/spring-framework-integration/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-deploy-parent - 6.2.0-SNAPSHOT + 6.2.0 spring-framework-integration ${project.artifactId} diff --git a/web-services/dictionary/pom.xml b/web-services/dictionary/pom.xml index cf464ebdb3..82132082dc 100644 --- a/web-services/dictionary/pom.xml +++ b/web-services/dictionary/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-ws-dictionary ejb diff --git a/web-services/examples/client-login/pom.xml b/web-services/examples/client-login/pom.xml index 33b61757aa..0bc1ebffff 100644 --- a/web-services/examples/client-login/pom.xml +++ b/web-services/examples/client-login/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-examples-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-ws-examples-client-login ejb diff --git a/web-services/examples/http-client/pom.xml b/web-services/examples/http-client/pom.xml index dd881d6bbe..c059a6b738 100644 --- a/web-services/examples/http-client/pom.xml +++ b/web-services/examples/http-client/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-examples-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-ws-examples-http-client jar diff --git a/web-services/examples/jms-client/pom.xml b/web-services/examples/jms-client/pom.xml index 16b4062827..9ec6dbd465 100644 --- a/web-services/examples/jms-client/pom.xml +++ b/web-services/examples/jms-client/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-examples-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-ws-examples-jms-client jar diff --git a/web-services/examples/pom.xml b/web-services/examples/pom.xml index b2f96c2db1..926241dc03 100644 --- a/web-services/examples/pom.xml +++ b/web-services/examples/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-ws-examples-parent pom diff --git a/web-services/examples/query-war/pom.xml b/web-services/examples/query-war/pom.xml index 99df0b58dc..5165aebfae 100644 --- a/web-services/examples/query-war/pom.xml +++ b/web-services/examples/query-war/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-examples-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-ws-examples-query-war war diff --git a/web-services/map-reduce-embedded/pom.xml b/web-services/map-reduce-embedded/pom.xml index fd28d8a678..6bd1ec3a74 100644 --- a/web-services/map-reduce-embedded/pom.xml +++ b/web-services/map-reduce-embedded/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-ws-map-reduce-embedded jar diff --git a/web-services/map-reduce-status/pom.xml b/web-services/map-reduce-status/pom.xml index 1f6a3cc05f..3c4f6ef930 100644 --- a/web-services/map-reduce-status/pom.xml +++ b/web-services/map-reduce-status/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-ws-map-reduce-status ejb diff --git a/web-services/map-reduce/pom.xml b/web-services/map-reduce/pom.xml index 6fcd98a75f..53d05054f6 100644 --- a/web-services/map-reduce/pom.xml +++ b/web-services/map-reduce/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-ws-map-reduce ejb diff --git a/web-services/model/pom.xml b/web-services/model/pom.xml index 525527b6a4..963186bf28 100644 --- a/web-services/model/pom.xml +++ b/web-services/model/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-ws-model ejb diff --git a/web-services/modification/pom.xml b/web-services/modification/pom.xml index 3a83535b59..7de7fde958 100644 --- a/web-services/modification/pom.xml +++ b/web-services/modification/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-ws-modification ejb diff --git a/web-services/pom.xml b/web-services/pom.xml index 193861d42c..ec000dc9a9 100644 --- a/web-services/pom.xml +++ b/web-services/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 6.2.0-SNAPSHOT + 6.2.0 gov.nsa.datawave.webservices datawave-ws-parent diff --git a/web-services/query-websocket/pom.xml b/web-services/query-websocket/pom.xml index e6b26792de..28af255c8d 100644 --- a/web-services/query-websocket/pom.xml +++ b/web-services/query-websocket/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-ws-query-websocket war diff --git a/web-services/query/pom.xml b/web-services/query/pom.xml index ba56c57529..e9b63dc53d 100644 --- a/web-services/query/pom.xml +++ b/web-services/query/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-ws-query ejb diff --git a/web-services/rest-api/pom.xml b/web-services/rest-api/pom.xml index b6b42df740..683440566c 100644 --- a/web-services/rest-api/pom.xml +++ b/web-services/rest-api/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-ws-rest-api war diff --git a/web-services/security/pom.xml b/web-services/security/pom.xml index 2c6b9670e9..4f47a84b31 100644 --- a/web-services/security/pom.xml +++ b/web-services/security/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-ws-security ejb diff --git a/web-services/web-root/pom.xml b/web-services/web-root/pom.xml index 8897003cf4..f933ee5838 100644 --- a/web-services/web-root/pom.xml +++ b/web-services/web-root/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0-SNAPSHOT + 6.2.0 datawave-ws-web-root war From 3d22d3644486b9e1cb57510b61f0c1790d80a90c Mon Sep 17 00:00:00 2001 From: hgklohr Date: Fri, 17 Nov 2023 17:46:04 +0000 Subject: [PATCH 21/32] 6.2.1-SNAPSHOT --- common-test/pom.xml | 2 +- core/pom.xml | 2 +- core/utils/pom.xml | 2 +- docs/pom.xml | 2 +- microservices/pom.xml | 2 +- microservices/services/pom.xml | 2 +- microservices/starters/pom.xml | 2 +- pom.xml | 2 +- warehouse/accumulo-extensions/pom.xml | 2 +- warehouse/assemble/datawave/pom.xml | 2 +- warehouse/assemble/pom.xml | 2 +- warehouse/assemble/webservice/pom.xml | 2 +- warehouse/common/pom.xml | 2 +- warehouse/core/pom.xml | 2 +- warehouse/data-dictionary-core/pom.xml | 2 +- warehouse/edge-dictionary-core/pom.xml | 2 +- warehouse/edge-model-configuration-core/pom.xml | 2 +- warehouse/index-stats/pom.xml | 2 +- warehouse/ingest-configuration/pom.xml | 2 +- warehouse/ingest-core/pom.xml | 2 +- warehouse/ingest-csv/pom.xml | 2 +- warehouse/ingest-json/pom.xml | 2 +- warehouse/ingest-nyctlc/pom.xml | 2 +- warehouse/ingest-scripts/pom.xml | 2 +- warehouse/ingest-wikipedia/pom.xml | 2 +- warehouse/metrics-core/pom.xml | 2 +- warehouse/ops-tools/config-compare/pom.xml | 2 +- warehouse/ops-tools/index-validation/pom.xml | 2 +- warehouse/ops-tools/pom.xml | 2 +- warehouse/pom.xml | 2 +- warehouse/query-core/pom.xml | 2 +- warehouse/regression-testing/pom.xml | 2 +- web-services/accumulo/pom.xml | 2 +- web-services/atom/pom.xml | 2 +- web-services/cached-results/pom.xml | 2 +- web-services/client/pom.xml | 2 +- web-services/common-util/pom.xml | 2 +- web-services/common/pom.xml | 2 +- web-services/deploy/application/pom.xml | 2 +- web-services/deploy/configuration/pom.xml | 2 +- web-services/deploy/docs/pom.xml | 2 +- web-services/deploy/pom.xml | 2 +- web-services/deploy/spring-framework-integration/pom.xml | 2 +- web-services/dictionary/pom.xml | 2 +- web-services/examples/client-login/pom.xml | 2 +- web-services/examples/http-client/pom.xml | 2 +- web-services/examples/jms-client/pom.xml | 2 +- web-services/examples/pom.xml | 2 +- web-services/examples/query-war/pom.xml | 2 +- web-services/map-reduce-embedded/pom.xml | 2 +- web-services/map-reduce-status/pom.xml | 2 +- web-services/map-reduce/pom.xml | 2 +- web-services/model/pom.xml | 2 +- web-services/modification/pom.xml | 2 +- web-services/pom.xml | 2 +- web-services/query-websocket/pom.xml | 2 +- web-services/query/pom.xml | 2 +- web-services/rest-api/pom.xml | 2 +- web-services/security/pom.xml | 2 +- web-services/web-root/pom.xml | 2 +- 60 files changed, 60 insertions(+), 60 deletions(-) diff --git a/common-test/pom.xml b/common-test/pom.xml index 88c3a3c3a6..5d2e4d7d70 100644 --- a/common-test/pom.xml +++ b/common-test/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-common-test ${project.artifactId} diff --git a/core/pom.xml b/core/pom.xml index 6d24bbbd36..a0e20860b6 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 6.2.0 + 6.2.1-SNAPSHOT gov.nsa.datawave.core datawave-core-parent diff --git a/core/utils/pom.xml b/core/utils/pom.xml index 2175f7b953..e0e2ae872b 100644 --- a/core/utils/pom.xml +++ b/core/utils/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 6.2.0 + 6.2.1-SNAPSHOT gov.nsa.datawave.core datawave-utils-parent diff --git a/docs/pom.xml b/docs/pom.xml index f48490b7d1..96ee14430b 100644 --- a/docs/pom.xml +++ b/docs/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-docs diff --git a/microservices/pom.xml b/microservices/pom.xml index a0555825b2..0fe2441aa4 100644 --- a/microservices/pom.xml +++ b/microservices/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 6.2.0 + 6.2.1-SNAPSHOT gov.nsa.datawave.microservice datawave-microservice-build-parent diff --git a/microservices/services/pom.xml b/microservices/services/pom.xml index 2ca1136957..e68464b80b 100644 --- a/microservices/services/pom.xml +++ b/microservices/services/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.microservice datawave-microservice-build-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-microservice-service-build-parent pom diff --git a/microservices/starters/pom.xml b/microservices/starters/pom.xml index 4a0f9f7f6d..935383d83a 100644 --- a/microservices/starters/pom.xml +++ b/microservices/starters/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.microservice datawave-microservice-build-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-microservice-starter-build-parent pom diff --git a/pom.xml b/pom.xml index a3b4c544b9..9040e5c603 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 gov.nsa.datawave datawave-parent - 6.2.0 + 6.2.1-SNAPSHOT pom DataWave DataWave is a Java-based ingest and query framework that leverages Apache Accumulo to provide fast, secure access to your data. diff --git a/warehouse/accumulo-extensions/pom.xml b/warehouse/accumulo-extensions/pom.xml index 9483669f51..04e14e0902 100644 --- a/warehouse/accumulo-extensions/pom.xml +++ b/warehouse/accumulo-extensions/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-accumulo-extensions ${project.artifactId} diff --git a/warehouse/assemble/datawave/pom.xml b/warehouse/assemble/datawave/pom.xml index b3f4a867b8..5ea8d8b7b7 100644 --- a/warehouse/assemble/datawave/pom.xml +++ b/warehouse/assemble/datawave/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave assemble-parent - 6.2.0 + 6.2.1-SNAPSHOT assemble-datawave pom diff --git a/warehouse/assemble/pom.xml b/warehouse/assemble/pom.xml index 2bf419a44f..58d7745965 100644 --- a/warehouse/assemble/pom.xml +++ b/warehouse/assemble/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0 + 6.2.1-SNAPSHOT assemble-parent pom diff --git a/warehouse/assemble/webservice/pom.xml b/warehouse/assemble/webservice/pom.xml index e5d55a22c4..09d987414d 100644 --- a/warehouse/assemble/webservice/pom.xml +++ b/warehouse/assemble/webservice/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave assemble-parent - 6.2.0 + 6.2.1-SNAPSHOT assemble-webservice ${project.artifactId} diff --git a/warehouse/common/pom.xml b/warehouse/common/pom.xml index ff4cec2dac..5a4f40a346 100644 --- a/warehouse/common/pom.xml +++ b/warehouse/common/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-common ${project.artifactId} diff --git a/warehouse/core/pom.xml b/warehouse/core/pom.xml index e2f7e35985..f75132369b 100644 --- a/warehouse/core/pom.xml +++ b/warehouse/core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-core jar diff --git a/warehouse/data-dictionary-core/pom.xml b/warehouse/data-dictionary-core/pom.xml index d6b24ba8b2..45c264b4ae 100644 --- a/warehouse/data-dictionary-core/pom.xml +++ b/warehouse/data-dictionary-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-data-dictionary-core jar diff --git a/warehouse/edge-dictionary-core/pom.xml b/warehouse/edge-dictionary-core/pom.xml index 745129f200..a3b363e4f7 100644 --- a/warehouse/edge-dictionary-core/pom.xml +++ b/warehouse/edge-dictionary-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-edge-dictionary-core jar diff --git a/warehouse/edge-model-configuration-core/pom.xml b/warehouse/edge-model-configuration-core/pom.xml index b38f3ff77c..0a536060d5 100644 --- a/warehouse/edge-model-configuration-core/pom.xml +++ b/warehouse/edge-model-configuration-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-edge-model-configuration-core jar diff --git a/warehouse/index-stats/pom.xml b/warehouse/index-stats/pom.xml index 75c17876db..e39afde709 100644 --- a/warehouse/index-stats/pom.xml +++ b/warehouse/index-stats/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-index-stats jar diff --git a/warehouse/ingest-configuration/pom.xml b/warehouse/ingest-configuration/pom.xml index 465c7760ac..2a61075c92 100644 --- a/warehouse/ingest-configuration/pom.xml +++ b/warehouse/ingest-configuration/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-ingest-configuration diff --git a/warehouse/ingest-core/pom.xml b/warehouse/ingest-core/pom.xml index 0dcb900d61..7189d00f17 100644 --- a/warehouse/ingest-core/pom.xml +++ b/warehouse/ingest-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-ingest-core jar diff --git a/warehouse/ingest-csv/pom.xml b/warehouse/ingest-csv/pom.xml index e4afc7471c..564edc1bb6 100644 --- a/warehouse/ingest-csv/pom.xml +++ b/warehouse/ingest-csv/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-ingest-csv jar diff --git a/warehouse/ingest-json/pom.xml b/warehouse/ingest-json/pom.xml index 069cba833d..90d5421aea 100644 --- a/warehouse/ingest-json/pom.xml +++ b/warehouse/ingest-json/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-ingest-json jar diff --git a/warehouse/ingest-nyctlc/pom.xml b/warehouse/ingest-nyctlc/pom.xml index 8c5549dfa5..09eb4ef75b 100644 --- a/warehouse/ingest-nyctlc/pom.xml +++ b/warehouse/ingest-nyctlc/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-ingest-nyctlc jar diff --git a/warehouse/ingest-scripts/pom.xml b/warehouse/ingest-scripts/pom.xml index a11795780d..0464f35447 100644 --- a/warehouse/ingest-scripts/pom.xml +++ b/warehouse/ingest-scripts/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-ingest-scripts ${project.artifactId} diff --git a/warehouse/ingest-wikipedia/pom.xml b/warehouse/ingest-wikipedia/pom.xml index e83b530bed..f85377862f 100644 --- a/warehouse/ingest-wikipedia/pom.xml +++ b/warehouse/ingest-wikipedia/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-ingest-wikipedia jar diff --git a/warehouse/metrics-core/pom.xml b/warehouse/metrics-core/pom.xml index 8abae9d46f..892840a2b9 100644 --- a/warehouse/metrics-core/pom.xml +++ b/warehouse/metrics-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-metrics-core jar diff --git a/warehouse/ops-tools/config-compare/pom.xml b/warehouse/ops-tools/config-compare/pom.xml index 296deaeed7..b494bdbc4e 100644 --- a/warehouse/ops-tools/config-compare/pom.xml +++ b/warehouse/ops-tools/config-compare/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-ops-tools-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-ops-tools-config-compare diff --git a/warehouse/ops-tools/index-validation/pom.xml b/warehouse/ops-tools/index-validation/pom.xml index 28c1bd2521..f946c6e8d4 100644 --- a/warehouse/ops-tools/index-validation/pom.xml +++ b/warehouse/ops-tools/index-validation/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-ops-tools-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-ops-tools-index-validation jar diff --git a/warehouse/ops-tools/pom.xml b/warehouse/ops-tools/pom.xml index 72c5a60e2a..773f072fb2 100644 --- a/warehouse/ops-tools/pom.xml +++ b/warehouse/ops-tools/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-ops-tools-parent pom diff --git a/warehouse/pom.xml b/warehouse/pom.xml index 2b3d286ec9..bc8678aa10 100644 --- a/warehouse/pom.xml +++ b/warehouse/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-warehouse-parent pom diff --git a/warehouse/query-core/pom.xml b/warehouse/query-core/pom.xml index 6b694f2d3d..286e7ce971 100644 --- a/warehouse/query-core/pom.xml +++ b/warehouse/query-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-query-core jar diff --git a/warehouse/regression-testing/pom.xml b/warehouse/regression-testing/pom.xml index 23b8a67f24..b3d710e2ce 100644 --- a/warehouse/regression-testing/pom.xml +++ b/warehouse/regression-testing/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-regression-testing ${project.artifactId} diff --git a/web-services/accumulo/pom.xml b/web-services/accumulo/pom.xml index 47f57e2380..34ef8d8f7c 100644 --- a/web-services/accumulo/pom.xml +++ b/web-services/accumulo/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-ws-accumulo ejb diff --git a/web-services/atom/pom.xml b/web-services/atom/pom.xml index 9eda58aad9..9c5e669a1d 100644 --- a/web-services/atom/pom.xml +++ b/web-services/atom/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-ws-atom ejb diff --git a/web-services/cached-results/pom.xml b/web-services/cached-results/pom.xml index 974b90089d..216a650784 100644 --- a/web-services/cached-results/pom.xml +++ b/web-services/cached-results/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-ws-cached-results ejb diff --git a/web-services/client/pom.xml b/web-services/client/pom.xml index 24b323c503..700f605d49 100644 --- a/web-services/client/pom.xml +++ b/web-services/client/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-ws-client jar diff --git a/web-services/common-util/pom.xml b/web-services/common-util/pom.xml index 90fec5ca2b..8956f6e488 100644 --- a/web-services/common-util/pom.xml +++ b/web-services/common-util/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-ws-common-util jar diff --git a/web-services/common/pom.xml b/web-services/common/pom.xml index 1de9afb833..e890cbe139 100644 --- a/web-services/common/pom.xml +++ b/web-services/common/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-ws-common ejb diff --git a/web-services/deploy/application/pom.xml b/web-services/deploy/application/pom.xml index e33777d8f2..943756bf7d 100644 --- a/web-services/deploy/application/pom.xml +++ b/web-services/deploy/application/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-deploy-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-ws-deploy-application ear diff --git a/web-services/deploy/configuration/pom.xml b/web-services/deploy/configuration/pom.xml index ae6603af5c..09f2528da9 100644 --- a/web-services/deploy/configuration/pom.xml +++ b/web-services/deploy/configuration/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-deploy-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-ws-deploy-configuration jar diff --git a/web-services/deploy/docs/pom.xml b/web-services/deploy/docs/pom.xml index c00e6b6d0b..61868e7643 100644 --- a/web-services/deploy/docs/pom.xml +++ b/web-services/deploy/docs/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-deploy-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-ws-deploy-docs war diff --git a/web-services/deploy/pom.xml b/web-services/deploy/pom.xml index 30846defce..ee7eae8ff8 100644 --- a/web-services/deploy/pom.xml +++ b/web-services/deploy/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0 + 6.2.1-SNAPSHOT gov.nsa.datawave.webservices datawave-ws-deploy-parent diff --git a/web-services/deploy/spring-framework-integration/pom.xml b/web-services/deploy/spring-framework-integration/pom.xml index 00dc15232b..0a6b4252fb 100644 --- a/web-services/deploy/spring-framework-integration/pom.xml +++ b/web-services/deploy/spring-framework-integration/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-deploy-parent - 6.2.0 + 6.2.1-SNAPSHOT spring-framework-integration ${project.artifactId} diff --git a/web-services/dictionary/pom.xml b/web-services/dictionary/pom.xml index 82132082dc..0fd20abc1c 100644 --- a/web-services/dictionary/pom.xml +++ b/web-services/dictionary/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-ws-dictionary ejb diff --git a/web-services/examples/client-login/pom.xml b/web-services/examples/client-login/pom.xml index 0bc1ebffff..f41c1b97ff 100644 --- a/web-services/examples/client-login/pom.xml +++ b/web-services/examples/client-login/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-examples-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-ws-examples-client-login ejb diff --git a/web-services/examples/http-client/pom.xml b/web-services/examples/http-client/pom.xml index c059a6b738..fb4c77f34a 100644 --- a/web-services/examples/http-client/pom.xml +++ b/web-services/examples/http-client/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-examples-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-ws-examples-http-client jar diff --git a/web-services/examples/jms-client/pom.xml b/web-services/examples/jms-client/pom.xml index 9ec6dbd465..dcab91e8cc 100644 --- a/web-services/examples/jms-client/pom.xml +++ b/web-services/examples/jms-client/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-examples-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-ws-examples-jms-client jar diff --git a/web-services/examples/pom.xml b/web-services/examples/pom.xml index 926241dc03..9a07c352fb 100644 --- a/web-services/examples/pom.xml +++ b/web-services/examples/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-ws-examples-parent pom diff --git a/web-services/examples/query-war/pom.xml b/web-services/examples/query-war/pom.xml index 5165aebfae..aafe41c1de 100644 --- a/web-services/examples/query-war/pom.xml +++ b/web-services/examples/query-war/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-examples-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-ws-examples-query-war war diff --git a/web-services/map-reduce-embedded/pom.xml b/web-services/map-reduce-embedded/pom.xml index 6bd1ec3a74..f9cb2cbe36 100644 --- a/web-services/map-reduce-embedded/pom.xml +++ b/web-services/map-reduce-embedded/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-ws-map-reduce-embedded jar diff --git a/web-services/map-reduce-status/pom.xml b/web-services/map-reduce-status/pom.xml index 3c4f6ef930..43c5d9c2ff 100644 --- a/web-services/map-reduce-status/pom.xml +++ b/web-services/map-reduce-status/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-ws-map-reduce-status ejb diff --git a/web-services/map-reduce/pom.xml b/web-services/map-reduce/pom.xml index 53d05054f6..794539ddcc 100644 --- a/web-services/map-reduce/pom.xml +++ b/web-services/map-reduce/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-ws-map-reduce ejb diff --git a/web-services/model/pom.xml b/web-services/model/pom.xml index 963186bf28..5cfb272ed9 100644 --- a/web-services/model/pom.xml +++ b/web-services/model/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-ws-model ejb diff --git a/web-services/modification/pom.xml b/web-services/modification/pom.xml index 7de7fde958..769734e335 100644 --- a/web-services/modification/pom.xml +++ b/web-services/modification/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-ws-modification ejb diff --git a/web-services/pom.xml b/web-services/pom.xml index ec000dc9a9..c466dbd9ff 100644 --- a/web-services/pom.xml +++ b/web-services/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 6.2.0 + 6.2.1-SNAPSHOT gov.nsa.datawave.webservices datawave-ws-parent diff --git a/web-services/query-websocket/pom.xml b/web-services/query-websocket/pom.xml index 28af255c8d..196d9154b7 100644 --- a/web-services/query-websocket/pom.xml +++ b/web-services/query-websocket/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-ws-query-websocket war diff --git a/web-services/query/pom.xml b/web-services/query/pom.xml index e9b63dc53d..433777a10d 100644 --- a/web-services/query/pom.xml +++ b/web-services/query/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-ws-query ejb diff --git a/web-services/rest-api/pom.xml b/web-services/rest-api/pom.xml index 683440566c..24a7cf1702 100644 --- a/web-services/rest-api/pom.xml +++ b/web-services/rest-api/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-ws-rest-api war diff --git a/web-services/security/pom.xml b/web-services/security/pom.xml index 4f47a84b31..6a1043be86 100644 --- a/web-services/security/pom.xml +++ b/web-services/security/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-ws-security ejb diff --git a/web-services/web-root/pom.xml b/web-services/web-root/pom.xml index f933ee5838..435e9f2f5e 100644 --- a/web-services/web-root/pom.xml +++ b/web-services/web-root/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 6.2.0 + 6.2.1-SNAPSHOT datawave-ws-web-root war From a54fe15fdc8bddc361622696753f6b2a1d32d599 Mon Sep 17 00:00:00 2001 From: Whitney O'Meara Date: Mon, 20 Nov 2023 19:19:36 +0000 Subject: [PATCH 22/32] Updated the type-utils and query metric submodules --- core/utils/type-utils | 2 +- microservices/services/query-metric | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/core/utils/type-utils b/core/utils/type-utils index 958c15f54a..3e9628e86e 160000 --- a/core/utils/type-utils +++ b/core/utils/type-utils @@ -1 +1 @@ -Subproject commit 958c15f54a6b5dbc627e3b2599c7e43f6446276d +Subproject commit 3e9628e86e0d02ebaa7374eb4eba57856523a221 diff --git a/microservices/services/query-metric b/microservices/services/query-metric index a6ded67a22..f388ed02c0 160000 --- a/microservices/services/query-metric +++ b/microservices/services/query-metric @@ -1 +1 @@ -Subproject commit a6ded67a22acb5edadc0502ec75ab9dc77aaf4f8 +Subproject commit f388ed02c0b8fd3fc101bbd7879c50406a234247 From 516e77b666c97f8cd51d7e4b9f230e244b82e6e3 Mon Sep 17 00:00:00 2001 From: "P.A" Date: Tue, 21 Nov 2023 15:10:09 -0500 Subject: [PATCH 23/32] Add option for non-existent params (#2100) * Adding in param for ignoring non-existent fields if needed * Adding setting in Query Logic * Updating unit test for param * Adding unit test for option flag * Adding test to ensure path is influenced by option * Updating formatting * Updating tests to get comparisons accurate, additional test * Adjusting fix for config test params * Adding logging for param skip * Updating unit tests * Adjusting typo in method sig --------- Co-authored-by: pcagbu --- .../java/datawave/query/QueryParameters.java | 2 + .../query/config/ShardQueryConfiguration.java | 10 ++++ .../query/planner/DefaultQueryPlanner.java | 4 +- .../query/tables/ShardQueryLogic.java | 5 ++ .../java/datawave/query/MiscQueryTest.java | 51 +++++++++++++++++++ .../config/ShardQueryConfigurationTest.java | 2 + 6 files changed, 73 insertions(+), 1 deletion(-) diff --git a/warehouse/query-core/src/main/java/datawave/query/QueryParameters.java b/warehouse/query-core/src/main/java/datawave/query/QueryParameters.java index 8a2ce76e09..81e76e2fdf 100644 --- a/warehouse/query-core/src/main/java/datawave/query/QueryParameters.java +++ b/warehouse/query-core/src/main/java/datawave/query/QueryParameters.java @@ -194,6 +194,8 @@ public class QueryParameters { public static final String DATE_INDEX_TIME_TRAVEL = "date.index.time.travel"; + public static final String IGNORE_NONEXISTENT_FIELDS = "ignore.nonexistent.fields"; + /** * Used to specify a SHARDS_AND_DAYS hint within the options function. */ diff --git a/warehouse/query-core/src/main/java/datawave/query/config/ShardQueryConfiguration.java b/warehouse/query-core/src/main/java/datawave/query/config/ShardQueryConfiguration.java index 2ddbea1668..300e5761e7 100644 --- a/warehouse/query-core/src/main/java/datawave/query/config/ShardQueryConfiguration.java +++ b/warehouse/query-core/src/main/java/datawave/query/config/ShardQueryConfiguration.java @@ -267,6 +267,7 @@ public class ShardQueryConfiguration extends GenericQueryConfiguration implement private String limitFieldsField = null; private boolean hitList = false; private boolean dateIndexTimeTravel = false; + private boolean ignoreNonExistentFields = false; // Cap (or fail if failOutsideValidDateRange) the begin date with this value (subtracted from Now). 0 or less disables this feature. private long beginDateCap = -1; private boolean failOutsideValidDateRange = true; @@ -511,6 +512,7 @@ public ShardQueryConfiguration(ShardQueryConfiguration other) { this.setIndexTableName(other.getIndexTableName()); this.setReverseIndexTableName(other.getReverseIndexTableName()); this.setMetadataTableName(other.getMetadataTableName()); + this.setIgnoreNonExistentFields(other.getIgnoreNonExistentFields()); this.setDateIndexTableName(other.getDateIndexTableName()); this.setIndexStatsTableName(other.getIndexStatsTableName()); this.setDefaultDateTypeName(other.getDefaultDateTypeName()); @@ -1648,6 +1650,14 @@ public void setDateIndexTimeTravel(boolean dateIndexTimeTravel) { this.dateIndexTimeTravel = dateIndexTimeTravel; } + public boolean getIgnoreNonExistentFields() { + return ignoreNonExistentFields; + } + + public void setIgnoreNonExistentFields(boolean ignoreNonExistentFields) { + this.ignoreNonExistentFields = ignoreNonExistentFields; + } + public long getBeginDateCap() { return beginDateCap; } diff --git a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java index a37e1a46f2..bf1b6d5455 100644 --- a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java +++ b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java @@ -857,8 +857,10 @@ protected ASTJexlScript processTree(final ASTJexlScript originalQueryTree, Shard config.setQueryTree(timedReduce(timers, "Reduce Query After ANYFIELD Expansions", config.getQueryTree())); } - if (!disableTestNonExistentFields) { + if (!disableTestNonExistentFields && (!config.getIgnoreNonExistentFields())) { timedTestForNonExistentFields(timers, config.getQueryTree(), config, metadataHelper, queryModel, settings); + } else { + log.debug("Skipping check for nonExistentFields.."); } // apply the node transform rules diff --git a/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java b/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java index 5375ffafad..fec34ca6d9 100644 --- a/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java +++ b/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java @@ -953,6 +953,11 @@ protected void loadQueryParameters(ShardQueryConfiguration config, Query setting this.setModelName(parameterModelName); } + String ignoreNonExist = settings.findParameter(QueryParameters.IGNORE_NONEXISTENT_FIELDS).getParameterValue().trim(); + if (StringUtils.isNotBlank(ignoreNonExist)) { + config.setIgnoreNonExistentFields(Boolean.valueOf(ignoreNonExist)); + } + config.setModelName(this.getModelName()); String parameterModelTableName = settings.findParameter(QueryParameters.PARAMETER_MODEL_TABLE_NAME).getParameterValue().trim(); diff --git a/warehouse/query-core/src/test/java/datawave/query/MiscQueryTest.java b/warehouse/query-core/src/test/java/datawave/query/MiscQueryTest.java index f2b16236f6..278330e601 100644 --- a/warehouse/query-core/src/test/java/datawave/query/MiscQueryTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/MiscQueryTest.java @@ -28,6 +28,7 @@ import datawave.query.attributes.Document; import datawave.query.exceptions.DatawaveFatalQueryException; import datawave.query.exceptions.FullTableScansDisallowedException; +import datawave.query.exceptions.InvalidQueryException; import datawave.query.testframework.AbstractFunctionalQuery; import datawave.query.testframework.AccumuloSetup; import datawave.query.testframework.BaseShardIdRange; @@ -96,6 +97,56 @@ public void testEventThreshold() throws Exception { runTest(query, expect); } + @Test(expected = InvalidQueryException.class) + public void testFieldIgnoreParam1() throws Exception { + log.info("------ testFieldIgnoreParam1 ------"); + // setting event per day does not alter results + this.logic.setEventPerDayThreshold(1); + String phrase = RE_OP + "'.*a'" + "&& FOO == bar2"; + String query = Constants.ANY_FIELD + phrase + "&& FOO == bar2"; + String expect = this.dataManager.convertAnyField(phrase); + + Map options = new HashMap<>(); + + // this will throw an exception due to the nonexistent fields not being ignored. + options.put(QueryParameters.IGNORE_NONEXISTENT_FIELDS, "false"); + + runTest(query, expect, options); + } + + @Test + public void testFieldIgnoreParam2() throws Exception { + log.info("------ testFieldIgnoreParam2 ------"); + // setting event per day does not alter results + this.logic.setEventPerDayThreshold(1); + String phrase = RE_OP + "'.*a'" + "&& FOO == bar2"; + String query = Constants.ANY_FIELD + phrase + "&& FOO == bar2"; + String expect = this.dataManager.convertAnyField(phrase); + + Map options = new HashMap<>(); + + // this should allow the query to run successfully. + options.put(QueryParameters.IGNORE_NONEXISTENT_FIELDS, "true"); + + runTest(query, expect, options); + } + + @Test + public void testFieldIgnoreParam3() throws Exception { + log.info("------ testFieldIgnoreParam3 ------"); + // setting event per day does not alter results + this.logic.setEventPerDayThreshold(1); + String phrase = RE_OP + "'.*a' && STATE == 'sta'"; + String query = Constants.ANY_FIELD + phrase + "&& STATE == 'sta'"; + String expect = this.dataManager.convertAnyField(phrase); + + Map options = new HashMap<>(); + + options.put(QueryParameters.IGNORE_NONEXISTENT_FIELDS, "false"); + + runTest(query, expect, options); + } + @Test public void testShardThreshold() throws Exception { log.info("------ testShardThreshold ------"); diff --git a/warehouse/query-core/src/test/java/datawave/query/config/ShardQueryConfigurationTest.java b/warehouse/query-core/src/test/java/datawave/query/config/ShardQueryConfigurationTest.java index cf4206c22e..7bff28c5d0 100644 --- a/warehouse/query-core/src/test/java/datawave/query/config/ShardQueryConfigurationTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/config/ShardQueryConfigurationTest.java @@ -123,6 +123,8 @@ public void setUp() throws Exception { updatedValues.put("maxIndexScanTimeMillis", 100000L); defaultValues.put("parseTldUids", false); updatedValues.put("parseTldUids", true); + defaultValues.put("ignoreNonExistentFields", false); + updatedValues.put("ignoreNonExistentFields", true); defaultValues.put("collapseUids", false); updatedValues.put("collapseUids", true); defaultValues.put("collapseUidsThreshold", -1); From 5f1e35c7512ae7df7e3510750b83d9af95ddd95c Mon Sep 17 00:00:00 2001 From: Moriarty <22225248+apmoriarty@users.noreply.github.com> Date: Wed, 22 Nov 2023 08:20:06 -0500 Subject: [PATCH 24/32] removed no-op 'optimize' config option from maven-compiler-pugin and updated deprecated 'destDir' property with 'outputDirectory' in maven-javadoc-plugin (#2150) --- pom.xml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 1cd6c2bf12..322c36ea63 100644 --- a/pom.xml +++ b/pom.xml @@ -1396,7 +1396,6 @@ 3.8.1 UTF-8 - true true true ${maven.compiler.source} @@ -1779,7 +1778,7 @@ package UTF-8 - ${project.build.directory}/apidocs + ${project.build.directory}/apidocs -J-Xmx768m From 6d6ed09d73accdb60084b7500f6e32b3ce145a71 Mon Sep 17 00:00:00 2001 From: Ivan Bella Date: Wed, 22 Nov 2023 15:59:35 +0000 Subject: [PATCH 25/32] Disable maven cache when using datawave-quickstart --- contrib/datawave-quickstart/bin/services/datawave/bootstrap.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/datawave-quickstart/bin/services/datawave/bootstrap.sh b/contrib/datawave-quickstart/bin/services/datawave/bootstrap.sh index 2c2b23c5e4..10bdde7766 100644 --- a/contrib/datawave-quickstart/bin/services/datawave/bootstrap.sh +++ b/contrib/datawave-quickstart/bin/services/datawave/bootstrap.sh @@ -30,7 +30,7 @@ source "${DW_DATAWAVE_SERVICE_DIR}/bootstrap-user.sh" DW_DATAWAVE_BUILD_PROFILE=${DW_DATAWAVE_BUILD_PROFILE:-dev} # Maven command -DW_DATAWAVE_BUILD_COMMAND="${DW_DATAWAVE_BUILD_COMMAND:-mvn -P${DW_DATAWAVE_BUILD_PROFILE} -Ddeploy -Dtar -Ddist -DskipTests clean package --builder smart -T1.0C}" +DW_DATAWAVE_BUILD_COMMAND="${DW_DATAWAVE_BUILD_COMMAND:-mvn -P${DW_DATAWAVE_BUILD_PROFILE} -Ddeploy -Dtar -Ddist -DskipTests -Dmaven.build.cache.enabled=false clean package --builder smart -T1.0C}" # Home of any temp data and *.properties file overrides for this instance of DataWave From ef80c2f057c1c5bca87cd2c78c1f6864fd54b281 Mon Sep 17 00:00:00 2001 From: Whitney O'Meara Date: Wed, 22 Nov 2023 19:15:57 +0000 Subject: [PATCH 26/32] Updated the metadata-utils and query metric submodules --- core/utils/metadata-utils | 2 +- microservices/services/query-metric | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/core/utils/metadata-utils b/core/utils/metadata-utils index b2d97900b4..d6e89c86f0 160000 --- a/core/utils/metadata-utils +++ b/core/utils/metadata-utils @@ -1 +1 @@ -Subproject commit b2d97900b4341f06e5e4dd631e57b23b6318b485 +Subproject commit d6e89c86f0dbe2059abbf0fcf1ac1959e1d4f6a7 diff --git a/microservices/services/query-metric b/microservices/services/query-metric index f388ed02c0..aa8b6a1c19 160000 --- a/microservices/services/query-metric +++ b/microservices/services/query-metric @@ -1 +1 @@ -Subproject commit f388ed02c0b8fd3fc101bbd7879c50406a234247 +Subproject commit aa8b6a1c191070eef4bf165395371f536dd28235 From 543e30fddd4476a367aea227c8c20c06acbbbd42 Mon Sep 17 00:00:00 2001 From: Whitney O'Meara Date: Wed, 22 Nov 2023 19:40:22 +0000 Subject: [PATCH 27/32] Updated metric service and client submodules --- microservices/services/query-metric | 2 +- microservices/starters/query-metric | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/microservices/services/query-metric b/microservices/services/query-metric index aa8b6a1c19..1ec83a314d 160000 --- a/microservices/services/query-metric +++ b/microservices/services/query-metric @@ -1 +1 @@ -Subproject commit aa8b6a1c191070eef4bf165395371f536dd28235 +Subproject commit 1ec83a314d1fddab31abbba74e5cbfa535cf2a70 diff --git a/microservices/starters/query-metric b/microservices/starters/query-metric index 17db2621f6..47ea1105de 160000 --- a/microservices/starters/query-metric +++ b/microservices/starters/query-metric @@ -1 +1 @@ -Subproject commit 17db2621f6f62d347cffdbe7b1ed3195b010a0ca +Subproject commit 47ea1105de7ba323162235f1a217d3c5d79a6b5b From 74a1434db94ca493218d1a8372add895f3a66ccb Mon Sep 17 00:00:00 2001 From: Whitney O'Meara Date: Wed, 22 Nov 2023 22:27:15 +0000 Subject: [PATCH 28/32] updated audit service, and query metric service/starter submodules --- microservices/services/audit | 2 +- microservices/services/query-metric | 2 +- microservices/starters/query-metric | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/microservices/services/audit b/microservices/services/audit index ea0f0cefd9..869c660896 160000 --- a/microservices/services/audit +++ b/microservices/services/audit @@ -1 +1 @@ -Subproject commit ea0f0cefd9974cc7ec6efe359d0005196b3eb797 +Subproject commit 869c66089671d29547caa9929ff548099160d640 diff --git a/microservices/services/query-metric b/microservices/services/query-metric index 1ec83a314d..c52497fc55 160000 --- a/microservices/services/query-metric +++ b/microservices/services/query-metric @@ -1 +1 @@ -Subproject commit 1ec83a314d1fddab31abbba74e5cbfa535cf2a70 +Subproject commit c52497fc553a4b4f355b39d77806fff9da70dcde diff --git a/microservices/starters/query-metric b/microservices/starters/query-metric index 47ea1105de..8ed73e114c 160000 --- a/microservices/starters/query-metric +++ b/microservices/starters/query-metric @@ -1 +1 @@ -Subproject commit 47ea1105de7ba323162235f1a217d3c5d79a6b5b +Subproject commit 8ed73e114c45816dd23d61c8a7cdba9f3be6f15c From d3878612d7d72d4d5312a4107597bc5d7c857d83 Mon Sep 17 00:00:00 2001 From: Whitney O'Meara Date: Wed, 22 Nov 2023 22:28:18 +0000 Subject: [PATCH 29/32] updated metric service submodule --- microservices/services/query-metric | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/microservices/services/query-metric b/microservices/services/query-metric index c52497fc55..81eac0d923 160000 --- a/microservices/services/query-metric +++ b/microservices/services/query-metric @@ -1 +1 @@ -Subproject commit c52497fc553a4b4f355b39d77806fff9da70dcde +Subproject commit 81eac0d9237419cf25802806ab501588bce56865 From f2f7e194de631ddcd806d7e3c2c86d9a17ddf8b9 Mon Sep 17 00:00:00 2001 From: Ivan Bella Date: Fri, 24 Nov 2023 14:54:09 +0000 Subject: [PATCH 30/32] Updated to avoid some formatting issues --- .github/workflows/tests.yml | 6 +++--- microservices/services/audit | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index cade7cc729..9dc379b54b 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -32,7 +32,7 @@ jobs: with: distribution: ${{env.JAVA_DISTRIBUTION}} java-version: ${{env.JAVA_VERSION}} - maven-version: 3.8.7 + maven-version: 3.9.5 cache: 'maven' - name: Format code run: | @@ -61,7 +61,7 @@ jobs: with: distribution: ${{env.JAVA_DISTRIBUTION}} java-version: ${{env.JAVA_VERSION}} - maven-version: 3.8.7 + maven-version: 3.9.5 cache: 'maven' - name: Build and Run Unit Tests run: | @@ -118,7 +118,7 @@ jobs: with: distribution: ${{env.JAVA_DISTRIBUTION}} java-version: ${{env.JAVA_VERSION}} - maven-version: 3.8.7 + maven-version: 3.9.5 cache: 'maven' # Allow us to use the "--squash" option below - name: Turn on Docker experimental features and move Docker data root diff --git a/microservices/services/audit b/microservices/services/audit index 869c660896..f66cc4a9ab 160000 --- a/microservices/services/audit +++ b/microservices/services/audit @@ -1 +1 @@ -Subproject commit 869c66089671d29547caa9929ff548099160d640 +Subproject commit f66cc4a9abe75d4d95aeadf2e58097c7739c83c0 From c5cdbd19c293c701fe55f3158f3f06dd8df75441 Mon Sep 17 00:00:00 2001 From: Ivan Bella Date: Fri, 24 Nov 2023 15:11:06 +0000 Subject: [PATCH 31/32] Updated to try and avoid github test issues --- .github/workflows/tests.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 9dc379b54b..3b89a65e34 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -36,7 +36,7 @@ jobs: cache: 'maven' - name: Format code run: | - mvn -V -B -e -ntp "-Dstyle.color=always" clean formatter:format sortpom:sort impsort:sort -Pautoformat + mvn -V -B -e -ntp "-Dstyle.color=always" clean formatter:format sortpom:sort impsort:sort -Dmaven.build.cache.enabled=false -Pautoformat git status git diff-index --quiet HEAD || (echo "Modified files found. Creating new commit with formatting fixes" && echo "diffs_found=true" >> "$GITHUB_ENV") - name: Commit Changes @@ -65,7 +65,7 @@ jobs: cache: 'maven' - name: Build and Run Unit Tests run: | - RUN_TESTS="mvn -V -B -e -ntp "-Dstyle.color=always" -Pdev,examples,assemble,spotbugs -Ddeploy -Ddist -T1C clean verify" + RUN_TESTS="mvn -V -B -e -ntp "-Dstyle.color=always" -Pdev,examples,assemble,spotbugs -Dmaven.build.cache.enabled=false -Ddeploy -Ddist -T1C clean verify" $RUN_TESTS \ || { echo "***** TESTS FAILED. Attempting retry."; $RUN_TESTS; } \ || { echo "***** TESTS FAILED. Attempting final retry."; $RUN_TESTS; } @@ -98,11 +98,11 @@ jobs: # ${{ runner.os }}-maven- # - name: Build Project # run: | - # BUILD="mvn -V -B -e -Pdev,examples,assemble,spotbugs -DskipServices -Ddeploy -Ddist -T1C -pl "-:config-service" clean install -DskipTests" + # BUILD="mvn -V -B -e -Pdev,examples,assemble,spotbugs -Dmaven.build.cache.enabled=false -DskipServices -Ddeploy -Ddist -T1C -pl "-:config-service" clean install -DskipTests" # $BUILD # - name: Run Microservice Unit Tests # run: | - # RUN_TESTS="mvn -V -B -e verify" + # RUN_TESTS="mvn -V -B -e -Dmaven.build.cache.enabled=false verify" # cd microservices # $RUN_TESTS \ # || { echo "***** TESTS FAILED. Attempting retry."; $RUN_TESTS; } \ @@ -133,10 +133,10 @@ jobs: # Builds the quickstart docker image and run the query tests - name: Quickstart Query Tests env: - DW_DATAWAVE_BUILD_COMMAND: "mvn -B -V -e -ntp -Dstyle.color=always -Pdev -Ddeploy -Dtar -DskipTests clean package" + DW_DATAWAVE_BUILD_COMMAND: "mvn -B -V -e -ntp -Dstyle.color=always -Dmaven.build.cache.enabled=false -Pdev -Ddeploy -Dtar -DskipTests clean package" DOCKER_BUILD_OPTS: "--squash --force-rm" run: | - TAG=$(mvn -q -N -Dexec.executable='echo' -Dexec.args='${project.version}' exec:exec) + TAG=$(mvn -q -N -Dmaven.build.cache.enabled=false -Dexec.executable='echo' -Dexec.args='${project.version}' exec:exec) contrib/datawave-quickstart/docker/docker-build.sh ${TAG} --docker-opts "${DOCKER_BUILD_OPTS}" # Here's an example of how you'd deploy the image to the github package registry. @@ -150,7 +150,7 @@ jobs: # IMAGE_NAME: "datawave/quickstart" # run: | # # Set up env vars - # TAG=$(mvn -q -N -Dexec.executable='echo' -Dexec.args='${project.version}' exec:exec) + # TAG=$(mvn -q -N -Dmaven.build.cache.enabled=false -Dexec.executable='echo' -Dexec.args='${project.version}' exec:exec) # REMOTE_IMAGE_NAME="${IMAGE_REGISTRY}/${IMAGE_USERNAME}/${IMAGE_NAME}" # # Log in to the package registry # echo ${{ secrets.GITHUB_TOKEN }} | docker login docker.pkg.github.com --username ${GITHUB_ACTOR} --password-stdin From 1e9717913960004f4357c17326f8f046e6f977d8 Mon Sep 17 00:00:00 2001 From: Ivan Bella Date: Fri, 24 Nov 2023 16:01:31 +0000 Subject: [PATCH 32/32] Set the thread stack size to its max in this environment --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 3b89a65e34..2f4302f86b 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -14,7 +14,7 @@ on: env: JAVA_VERSION: '11' JAVA_DISTRIBUTION: 'zulu' #This is the default on v1 of the action for 1.8 - MAVEN_OPTS: "-Djansi.force=true -Dhttps.protocols=TLSv1.2 -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=WARN -Djava.awt.headless=true" + MAVEN_OPTS: "-Djansi.force=true -Dhttps.protocols=TLSv1.2 -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=WARN -Djava.awt.headless=true -XX:ThreadStackSize=1m" jobs: # Runs the pom sorter and code formatter to ensure that the code