getColumnVisibilities() {
+ return Collections.unmodifiableSet(columnVisibilities);
+ }
+
+ /**
+ * Return the total number of times a field was seen.
+ *
+ * @return the total count
+ */
+ @Override
+ public Long getAggregation() {
+ return count;
+ }
+
+ @Override
+ public boolean hasAggregation() {
+ return count > 0L;
+ }
+
+ /**
+ * Increments the current count by 1.
+ *
+ * @param value
+ * the value to aggregate
+ */
+ @Override
+ public void aggregate(Attribute> value) {
+ count++;
+ this.columnVisibilities.add(value.getColumnVisibility());
+ }
+
+ @Override
+ public void merge(Aggregator> other) {
+ if (other instanceof CountAggregator) {
+ CountAggregator aggregator = (CountAggregator) other;
+ this.count += aggregator.count;
+ this.columnVisibilities.addAll(aggregator.columnVisibilities);
+ } else {
+ throw new IllegalArgumentException("Cannot merge instance of " + other.getClass().getName());
+ }
+ }
+
+ @Override
+ public String toString() {
+ return new ToStringBuilder(this).append("field", field).append("count", count).append("columnVisibilities", columnVisibilities).toString();
+ }
+}
diff --git a/warehouse/query-core/src/main/java/datawave/query/common/grouping/DocumentGrouper.java b/warehouse/query-core/src/main/java/datawave/query/common/grouping/DocumentGrouper.java
new file mode 100644
index 0000000000..af98c779c7
--- /dev/null
+++ b/warehouse/query-core/src/main/java/datawave/query/common/grouping/DocumentGrouper.java
@@ -0,0 +1,717 @@
+package datawave.query.common.grouping;
+
+import static org.slf4j.LoggerFactory.getLogger;
+
+import java.math.BigDecimal;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+import org.apache.accumulo.core.data.Key;
+import org.javatuples.Pair;
+import org.slf4j.Logger;
+
+import com.google.common.collect.ArrayListMultimap;
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.Multimap;
+import com.google.common.collect.Sets;
+
+import datawave.data.type.Type;
+import datawave.query.attributes.Attribute;
+import datawave.query.attributes.Document;
+import datawave.query.attributes.TypeAttribute;
+
+/**
+ * This class provides the primary functionality needed to group documents and aggregate field values within identified groups (regardless if done server or
+ * client-side).
+ *
+ *
+ * Grouping
+ *
+ * Grouping fields across documents will result in groupings of distinct value groupings for each specified field to group, as well as the total number of times
+ * each particular grouping combination was seen. Fields to group by can be specified by the following options:
+ *
+ * - The LUCENE function {@code #GROUPBY()}.
+ * - The JEXL function {@code f:groupby()}.
+ * - The query parameter {@code group.fields}.
+ *
+ * Groupings may be of any size that encompass none, some, or all of the target group fields. If a document has no entries for any of the target group fields,
+ * it will be grouped as part of an 'empty' grouping, and all target aggregation entries will be aggregated to the empty grouping. The count for 'empty' groups
+ * will be the same as the number of documents seen without any group-by fields. Values are grouped together based on the format of each document entry's key,
+ * which may have one of the following formats:
+ *
+ * - {@code }
+ * - {@code .}
+ * - {@code ....}
+ *
+ * Values of fields with the same context and instance are considered direct one-to-one grouping matches, and will be placed within the same groupings. Direct
+ * matches cannot be determined for values of fields that do not have a context, and as such they will be combined with each possible grouping, effectively a
+ * cartesian product. Direct matches are prioritized and found first before indirect matches are combined with them.
+ *
+ *
+ * Aggregation
+ *
+ * Once all valid groupings have been identified and counted, aggregation can be performed on the values of any specified fields for each grouping. The
+ * aggregation fields can differ from the group-by fields. The following aggregation operations are supported:
+ *
+ *
+ * SUM: Sum up all the values for specified fields across groupings. This operation is limited to fields with numerical values. Fields may be
+ * specified via:
+ *
+ * - The LUCENE function {@code #SUM()}.
+ * - The JEXL function {@code f:sum()}.
+ * - The query parameter {@code sum.fields}.
+ *
+ * MAX: Find the max values for specified fields across groupings. Fields may be specified via:
+ *
+ * - The LUCENE function {@code #MAX()}.
+ * - The JEXL function {@code f:max()}.
+ * - The query parameter {@code max.fields}.
+ *
+ * MIN: Find the min values for specified fields across groupings. Fields may be specified via:
+ *
+ * - The LUCENE function {@code #MIN()}.
+ * - The JEXL function {@code f:min()}.
+ * - The query parameter {@code min.fields}.
+ *
+ * COUNT: Count the number of times values were seen for specified fields across groupings. Fields may be specified via:
+ *
+ * - The LUCENE function {@code #COUNT()}.
+ * - The JEXL function {@code f:count()}.
+ * - The query parameter {@code count.fields}.
+ *
+ * AVERAGE: Find the average of all values for specified fields across groupings. This operation is limited to fields with numerical values.
+ * Fields may be specified via:
+ *
+ * - The LUCENE function {@code #AVERAGE()}.
+ * - The JEXL function {@code f:average()}.
+ * - The query parameter {@code average.fields}.
+ *
+ */
+public class DocumentGrouper {
+
+ private static final Logger log = getLogger(DocumentGrouper.class);
+
+ public static final String GROUP_COUNT = "COUNT";
+ public static final String FIELD_SUM_SUFFIX = "_SUM";
+ public static final String FIELD_MAX_SUFFIX = "_MAX";
+ public static final String FIELD_MIN_SUFFIX = "_MIN";
+ public static final String FIELD_AVERAGE_NUMERATOR_SUFFIX = "_AVERAGE_NUMERATOR";
+ public static final String FIELD_AVERAGE_DIVISOR_SUFFIX = "_AVERAGE_DIVISOR";
+ public static final String FIELD_AVERAGE_SUFFIX = "_AVERAGE";
+ public static final String FIELD_COUNT_SUFFIX = "_COUNT";
+
+ /**
+ * Groups and aggregates fields from the entries in the given document and merges the new group information into the given {@link Groups} instance.
+ *
+ * @param entry
+ * the document entry
+ * @param groupFields
+ * the fields to group and aggregate
+ * @param groups
+ * the {@link Groups} instance to merge newly found groups into
+ */
+ public static void group(Map.Entry entry, GroupFields groupFields, Groups groups) {
+ DocumentGrouper documentGrouper = new DocumentGrouper(entry, groupFields, groups);
+ documentGrouper.group();
+ }
+
+ private final Key documentKey;
+ private final Document document;
+ private final Set groupFields;
+ private final Map reverseModelMappings;
+ private final FieldAggregator.Factory fieldAggregatorFactory;
+
+ private final Groups groups;
+ private final Groups currentGroups = new Groups();
+ private final FieldIndex groupFieldsIndex = new FieldIndex(false);
+ private final FieldIndex aggregateFieldsIndex = new FieldIndex(true);
+ private final Multimap,Grouping> groupingContextAndInstancesSeenForGroups = HashMultimap.create();
+ private final int maxGroupSize;
+
+ private DocumentGrouper(Map.Entry documentEntry, GroupFields groupFields, Groups groups) {
+ this.documentKey = documentEntry.getKey();
+ this.document = documentEntry.getValue();
+ this.groupFields = groupFields.getGroupByFields();
+ this.fieldAggregatorFactory = groupFields.getFieldAggregatorFactory();
+ this.reverseModelMappings = groupFields.getReverseModelMap();
+ this.groups = groups;
+ this.maxGroupSize = this.groupFields.size();
+ }
+
+ /**
+ * Identify valid groups in the given document and aggregate relevant events to those groups.
+ */
+ private void group() {
+ log.trace("apply to {} {}", documentKey, document);
+ // If the document contains entries that indicate grouping has already been performed, we are seeing a document that was generated by
+ // GroupingIterator.flatten(). No further grouping can occur. Extract the grouping information from the document and merge them into the current groups.
+ if (isDocumentAlreadyGrouped()) {
+ extractGroupsFromDocument();
+ } else { // Otherwise, the document contains entries that have not yet been grouped and counted.
+ // Index the document entries.
+ indexDocumentEntries();
+ // Group the document entries.
+ groupEntries();
+ // Aggregate fields only if there were aggregation fields specified and if any entries for aggregation were found.
+ if (fieldAggregatorFactory.hasFieldsToAggregate() && !aggregateFieldsIndex.isEmpty()) {
+ aggregateEntries();
+ }
+
+ // Merge the groups and aggregations we found in this particular group-by operation into the groups passed by the user. The separation is required
+ // to ensure that any grouping and aggregation done in this session was applied only to the current document.
+ this.groups.mergeAll(currentGroups);
+ }
+ }
+
+ /**
+ * Return whether the document contains entries representing a flattened set of group counts generated by {@link datawave.query.iterator.GroupingIterator}.
+ *
+ * @return true if the document contains flattened group counts, or false otherwise.
+ */
+ private boolean isDocumentAlreadyGrouped() {
+ return document.getDictionary().keySet().stream().anyMatch(key -> key.startsWith(GROUP_COUNT));
+ }
+
+ /**
+ * Extract grouping information from the current document and add them to the current groups. Each field will be remapped if a reverse-model mapping was
+ * supplied.
+ */
+ @SuppressWarnings("unchecked")
+ private void extractGroupsFromDocument() {
+ // Parse a field from each entry and store them in instanceToFields. The id indicates which grouping, count, and aggregated values go together.
+ Multimap idToFields = HashMultimap.create();
+ for (Map.Entry>> entry : document.entrySet()) {
+ Field field = parseField(entry);
+ idToFields.put(field.getInstance(), field);
+ }
+ // For each distinct grouping, parse and write the grouping information to the current groups.
+ for (String instance : idToFields.keySet()) {
+ // The distinct grouping.
+ Grouping grouping = new Grouping();
+ // The aggregated values.
+ FieldAggregator fieldAggregator = new FieldAggregator();
+ // The total times the grouping was seen.
+ int count = 0;
+ for (Field field : idToFields.get(instance)) {
+ // We found the group count.
+ if (field.getBase().equals(GROUP_COUNT)) {
+ TypeAttribute attribute = (TypeAttribute) field.getAttribute();
+ count = attribute.getType().getDelegate().intValue();
+ // We found the sum of an aggregated field.
+ } else if (field.getBase().endsWith(FIELD_SUM_SUFFIX)) {
+ TypeAttribute attribute = (TypeAttribute) field.getAttribute();
+ String fieldName = removeSuffix(field.getBase(), FIELD_SUM_SUFFIX);
+ fieldAggregator.mergeAggregator(SumAggregator.of(fieldName, attribute));
+ // We found the numerator of the average of an aggregated field.
+ } else if (field.getBase().endsWith(FIELD_AVERAGE_NUMERATOR_SUFFIX)) {
+ String unmappedFieldName = removeSuffix(field.getBase(), FIELD_AVERAGE_NUMERATOR_SUFFIX);
+ String fieldName = removeSuffix(field.getBase(), FIELD_AVERAGE_NUMERATOR_SUFFIX);
+ // It's possible that the divisor will be stored under a previously unmapped field name. For example, the field ETA from
+ // ETA_AVERAGE_NUMERATOR.1 could be mapped to AG here. Use the original field name (e.g. ETA) to ensure we find the
+ // corresponding divisor (e.g. ETA_AVERAGE_DIVISOR.1) for the numerator.
+ String divisorField = unmappedFieldName + FIELD_AVERAGE_DIVISOR_SUFFIX + "." + field.getInstance();
+ TypeAttribute divisorAttribute = (TypeAttribute) document.get(divisorField);
+ TypeAttribute numeratorAttribute = (TypeAttribute) field.getAttribute();
+ fieldAggregator.mergeAggregator(AverageAggregator.of(fieldName, numeratorAttribute, divisorAttribute));
+ // We found the count of an aggregated field.
+ } else if (field.getBase().endsWith(FIELD_COUNT_SUFFIX)) {
+ TypeAttribute attribute = (TypeAttribute) field.getAttribute();
+ String fieldName = removeSuffix(field.getBase(), FIELD_COUNT_SUFFIX);
+ fieldAggregator.mergeAggregator(CountAggregator.of(fieldName, attribute));
+ // We found the min of an aggregated field.
+ } else if (field.getBase().endsWith(FIELD_MIN_SUFFIX)) {
+ String fieldName = removeSuffix(field.getBase(), FIELD_MIN_SUFFIX);
+ fieldAggregator.mergeAggregator(MinAggregator.of(fieldName, field.getAttribute()));
+ // We found the max of an aggregated field.
+ } else if (field.getBase().endsWith(FIELD_MAX_SUFFIX)) {
+ String fieldName = removeSuffix(field.getBase(), FIELD_MAX_SUFFIX);
+ fieldAggregator.mergeAggregator(MaxAggregator.of(fieldName, field.getAttribute()));
+ // We found a field that is part of the grouping.
+ } else if (!field.getBase().endsWith(FIELD_AVERAGE_DIVISOR_SUFFIX)) {
+ Attribute> attribute = field.getAttribute();
+ GroupingAttribute> newAttribute = new GroupingAttribute<>((Type>) attribute.getData(), new Key(field.getBase()), true);
+ newAttribute.setColumnVisibility(attribute.getColumnVisibility());
+ grouping.add(newAttribute);
+ }
+ }
+ // Create a new group and merge it into the existing groups.
+ Group group = new Group(grouping, count);
+ group.setFieldAggregator(fieldAggregator);
+ group.addDocumentVisibility(document.getColumnVisibility());
+ groups.mergeOrPutGroup(group);
+ }
+ }
+
+ /**
+ * Return a substring of the given str without the given suffix.
+ *
+ * @param str
+ * the string
+ * @param suffix
+ * the suffix
+ * @return the string without the suffix
+ */
+ private String removeSuffix(String str, String suffix) {
+ int suffixLength = suffix.length();
+ return str.substring(0, str.length() - suffixLength);
+ }
+
+ /**
+ * Identify which events in the document are targets for grouping and/or aggregation, and index them.
+ */
+ private void indexDocumentEntries() {
+ for (Map.Entry> entry : document.entrySet()) {
+ Field field = parseField(entry);
+ // The current field is a target for grouping.
+ if (groupFields.contains(field.getBase())) {
+ groupFieldsIndex.index(field);
+ }
+ // The current field is a target for aggregation.
+ if (fieldAggregatorFactory.isFieldToAggregate(field.getBase())) {
+ aggregateFieldsIndex.index(field);
+ }
+ }
+ }
+
+ /**
+ * Identify valid groupings consisting of target group pairs and create/update their corresponding {@link Group} in {@link #currentGroups}.
+ */
+ private void groupEntries() {
+ // If we found any entries for target group fields, identify all valid groupings.
+ if (groupEntriesFound()) {
+ // The groupings combinations that we find. Each combination may only have one Field from a particular target group field, e.g. if doing
+ // #GROUP_BY(AGE,GENDER), a combination set will have at most one AGE field and one GENDER field.
+ List> groupings = new ArrayList<>();
+
+ // If we only have one target grouping field, we do not need to find any group combinations. All events for the given target group field should be
+ // tracked as individual groupings.
+ if (maxGroupSize == 1) {
+ groupFieldsIndex.fields.values().stream().map(Collections::singleton).forEach(groupings::add);
+ } else {
+ // If we have any group field events with grouping contexts and instances, e.g. GENDER.FOO.1, it's possible that we will find direct matches to
+ // other group field events with the same grouping context and instance (a direct match). These should be found first for efficiency purposes.
+ if (groupFieldsIndex.hasFieldsWithPossibleDirectMatch()) {
+ groupings = getGroupingsWithDirectMatches();
+ }
+ // If we have any group field events that do not have a grouping context and instance, e.g. GENDER.1 or GENDER, then each one of those events
+ // should
+ // be combined with each existing group combination, effectively creating cartesian products.
+ if (groupFieldsIndex.hasFieldsWithoutDirectMatch()) {
+ groupings = getGroupingsWithoutDirectMatches(groupings);
+ }
+ }
+
+ // Track each identified grouping.
+ groupings.forEach(this::trackGroup);
+ } else {
+ // If no entries were found for any of the target group fields, create a single 'empty' group that will represent this document in the final
+ // grouping results.
+ trackGroup(Grouping.emptyGrouping());
+ }
+ }
+
+ /**
+ * Identify grouping combinations that are direct matches to each other based on the grouping context and instance of the field events. If we do not find
+ * any direct match at all for a specified target group field, then all events for the group field will be combined.
+ *
+ * @return the direct match combinations
+ */
+ private List> getGroupingsWithDirectMatches() {
+ List> groupings = new ArrayList<>();
+ Set fieldsWithGroupingContextAndInstance = groupFieldsIndex.getFieldsWithPossibleDirectMatch();
+ // If we only saw one field with a grouping context and instance, return a list of singletons with each field event. We cannot create any combinations
+ // at this time.
+ if (fieldsWithGroupingContextAndInstance.size() == 1) {
+ Collection fields = groupFieldsIndex.getFields(fieldsWithGroupingContextAndInstance.iterator().next());
+ fields.stream().map(Collections::singleton).forEach(groupings::add);
+ } else {
+ // If we have more than one target field with a grouping context and instance, determine the correct groupings based off matching the grouping
+ // context and instance where possible with direct 1-to-1 matches, i.e. AGE.FOO.1 is a direct match to GENDER.FOO.1.
+ Multimap,Field> groupingContextAndInstanceToField = HashMultimap.create();
+ for (String fieldName : fieldsWithGroupingContextAndInstance) {
+ Collection fields = groupFieldsIndex.getFields(fieldName);
+ for (Field field : fields) {
+ groupingContextAndInstanceToField.put(Pair.with(field.getGroupingContext(), field.getInstance()), field);
+ }
+ }
+
+ // Sort the entries by the number of direct matches seen for each grouping context-instance pair.
+ SortedSet,Collection>> directMatchesSortedByPrevalence = new TreeSet<>(
+ Comparator.comparingInt((Map.Entry,Collection> left) -> left.getValue().size()).reversed()
+ .thenComparing(Map.Entry::getKey));
+ directMatchesSortedByPrevalence.addAll(groupingContextAndInstanceToField.asMap().entrySet());
+
+ // Map of group target field names to the grouping combinations found for them.
+ Multimap,Set> fieldsToGroupings = ArrayListMultimap.create();
+ // Tracks the largest size seen for any combination of direct matches for target group fields.
+ Map fieldToLargestGroupingSize = new HashMap<>();
+
+ for (Map.Entry,Collection> entry : directMatchesSortedByPrevalence) {
+ Collection fields = entry.getValue();
+ SortedSet groupingFields = new TreeSet<>();
+ boolean keep = false;
+ for (Field field : fields) {
+ groupingFields.add(field.getBase());
+ // If we have seen this field before associated with another grouping context and instance, only keep this grouping if it is the same size
+ // as the largest grouping we've seen for the field.
+ if (fieldToLargestGroupingSize.containsKey(field.getBase())) {
+ if (fields.size() == fieldToLargestGroupingSize.get(field.getBase())) {
+ keep = true;
+ }
+ } else {
+ // If this is the first time we are seeing this field, then we have found the largest batch size for the grouping that this field is in.
+ // Automatically keep this grouping.
+ fieldToLargestGroupingSize.put(field.getBase(), fields.size());
+ keep = true;
+ }
+ }
+ if (keep) {
+ fieldsToGroupings.put(groupingFields, Sets.newHashSet(fields));
+ }
+ }
+
+ // Now that we've found the largest direct match combinations for each target group field, we need to effectively create cartesian products between
+ // each combination. For instance, given the following grouping combinations resulting from #GROUP_BY(AGE,GENDER,RECORD_ID,RECORD_TEXT,BUILDING):
+ //
+ // {AGE,GENDER} => [{"20", "MALE"},{"10", "FEMALE"}]
+ // {RECORD_ID,RECORD_TEXT} => [{"123", "Summary"}]
+ // {BUILDING} => [{West},{East}]
+ //
+ // We want to generate the following combinations:
+ // {"20","MALE","123","Summary","West"}
+ // {"20","MALE","123","Summary","East"}
+ // {"10","FEMALE","123","Summary","West"}
+ // {"10","FEMALE","123","Summary","East"}
+ for (SortedSet fields : fieldsToGroupings.keySet()) {
+ Collection> currentGroupings = fieldsToGroupings.get(fields);
+ if (groupings.isEmpty()) {
+ groupings.addAll(currentGroupings);
+ } else {
+ List> newGroupings = new ArrayList<>();
+ for (Set oldGrouping : groupings) {
+ for (Set currentGrouping : currentGroupings) {
+ Set newGrouping = new HashSet<>(oldGrouping);
+ newGrouping.addAll(currentGrouping);
+ newGroupings.add(newGrouping);
+ }
+ }
+ groupings = newGroupings;
+ }
+ }
+ }
+ return groupings;
+ }
+
+ /**
+ * Combine each field event for target group fields that do not have both a grouping context and instance to any previously found grouping combinations.
+ *
+ * @param prevGroupings
+ * the combinations that have been found thus far
+ * @return the updated grouping combinations
+ */
+ private List> getGroupingsWithoutDirectMatches(List> prevGroupings) {
+ List> groupings = new ArrayList<>(prevGroupings);
+ for (String fieldName : groupFieldsIndex.getFieldsWithoutDirectMatch()) {
+ Collection fields = groupFieldsIndex.getFields(fieldName);
+ // If there are no previous grouping combinations, add each field event as a singular combination.
+ if (groupings.isEmpty()) {
+ for (Field field : fields) {
+ groupings.add(Sets.newHashSet(field));
+ }
+ } else {
+ // Effectively create cartesian products of each previously seen grouping combination and each field event for the current target event field.
+ // For instance, if we have the previous combination [{"20","MALE"},{"10","FEMALE"}] and the field events {"A","B","C"}, we want to generate
+ // the following combinations:
+ //
+ // {"20","MALE", "A"}
+ // {"20","MALE", "B"}
+ // {"20","MALE", "C"}
+ // {"10","FEMALE", "A"}
+ // {"10","FEMALE", "B"}
+ // {"10","FEMALE", "C"}
+ List> newGroupings = new ArrayList<>();
+ for (Set oldGrouping : groupings) {
+ for (Field field : fields) {
+ Set newGrouping = new HashSet<>(oldGrouping);
+ newGrouping.add(field);
+ newGroupings.add(newGrouping);
+ }
+ }
+ groupings = newGroupings;
+ }
+ }
+ return groupings;
+ }
+
+ /**
+ * Track the groups identified by the given field event combinations.
+ *
+ * @param groupedFields
+ * the group combination
+ */
+ private void trackGroup(Collection groupedFields) {
+ // The grouping context-instance pairs seen for all grouping keys generated in this method.
+ Set> groupingContextAndInstances = new HashSet<>();
+ // The set of 'keys' that are used to identify individual distinct groupings.
+ List groupings = new ArrayList<>();
+ // It is possible for a field event in a grouping combination to have a multi-value attribute. If this occurs, we must once again create cartesian
+ // products between all the values of the attribute of each field.
+ for (Field field : groupedFields) {
+ // Track the grouping context-instance pair. This is required for us to be able to find direct matches later when aggregating.
+ if (field.hasGroupingContext() && field.hasInstance()) {
+ groupingContextAndInstances.add(Pair.with(field.getGroupingContext(), field.getInstance()));
+ }
+ // If we have no grouping keys yet, create keys consisting of each value of the current field.
+ if (groupings.isEmpty()) {
+ for (Attribute> attribute : field.getAttributes()) {
+ GroupingAttribute> copy = createCopyWithKey(attribute, field.getBase());
+ groupings.add(new Grouping(copy));
+ }
+ } else {
+ // Otherwise, create the cartesian product between the current field's value and each existing key.
+ List newGroupings = new ArrayList<>();
+ for (Attribute> attribute : field.getAttributes()) {
+ GroupingAttribute> copy = createCopyWithKey(attribute, field.getBase());
+ for (Grouping grouping : groupings) {
+ Grouping groupingCopy = new Grouping(grouping);
+ groupingCopy.add(copy);
+ newGroupings.add(groupingCopy);
+ }
+ }
+ groupings = newGroupings;
+ }
+ }
+
+ // Track which grouping context-instance pairs we have seen for each grouping key.
+ for (Pair groupingContextAndInstance : groupingContextAndInstances) {
+ this.groupingContextAndInstancesSeenForGroups.putAll(groupingContextAndInstance, groupings);
+ }
+
+ // Now we can create/update groups in currentGroups for each grouping key.
+ groupings.forEach(this::trackGroup);
+ }
+
+ /**
+ * Create/update the group for the given grouping.
+ *
+ * @param grouping
+ * the grouping to track
+ */
+ private void trackGroup(Grouping grouping) {
+ // Get the group.
+ Group group = currentGroups.getGroup(grouping);
+ // Create a group for the grouping if one does not already exist.
+ if (group == null) {
+ group = new Group(grouping);
+ group.setFieldAggregator(fieldAggregatorFactory.newInstance());
+ currentGroups.putGroup(group);
+ }
+ // Add the visibilities of each attribute in the grouping for combination later, and increment the count for how many times this distinct
+ // grouping was seen.
+ group.addAttributeVisibilities(grouping);
+ group.incrementCount();
+ group.addDocumentVisibility(document.getColumnVisibility());
+ }
+
+ private GroupingAttribute> createCopyWithKey(Attribute> attribute, String key) {
+ Type> type = ((TypeAttribute>) attribute).getType();
+ GroupingAttribute> newAttribute = new GroupingAttribute<>(type, new Key(key), true);
+ newAttribute.setColumnVisibility(attribute.getColumnVisibility());
+ return newAttribute;
+ }
+
+ /**
+ * Aggregate all qualifying events that are from target aggregation fields.
+ */
+ private void aggregateEntries() {
+ // Groupings were found in the document. Aggregate entries according to their association based on each entry's grouping context and instance.
+ if (groupEntriesFound()) {
+ // If we have any target events for aggregation that have a grouping context and instance, e.g. AGE.FOO.1, attempt to find groups that have matching
+ // grouping context and instance pairs, and aggregate the events into those groups only. If we do not find any direct match at all for a specified
+ // aggregation field, then all events for the aggregation field will be aggregated into each group.
+ if (aggregateFieldsIndex.hasFieldsWithPossibleDirectMatch()) {
+ // Attempt to find a direct match for the current aggregation target field.
+ for (String fieldName : aggregateFieldsIndex.fieldToFieldsByGroupingContextAndInstance.keySet()) {
+ Multimap,Field> groupingContextAndInstanceToFields = aggregateFieldsIndex.fieldToFieldsByGroupingContextAndInstance
+ .get(fieldName);
+ Set> aggregatePairs = groupingContextAndInstanceToFields.keySet();
+ Set> groupPairs = this.groupingContextAndInstancesSeenForGroups.keySet();
+ // A group and an aggregation event is considered to be a direct match if and only if the group contains any event that has the same
+ // grouping context and instance as the aggregation event.
+ Set> directMatches = Sets.intersection(aggregatePairs, groupPairs);
+ // If we have any direct matches, then only aggregate the direct matches into the groups where we saw a direct match.
+ if (!directMatches.isEmpty()) {
+ for (Pair directMatch : directMatches) {
+ for (Grouping grouping : this.groupingContextAndInstancesSeenForGroups.get(directMatch)) {
+ Group group = currentGroups.getGroup(grouping);
+ Collection fields = groupingContextAndInstanceToFields.get(directMatch);
+ group.aggregateAll(fields);
+ }
+ }
+ } else {
+ // Otherwise, aggregate all events for this field into all groups.
+ Collection fields = aggregateFieldsIndex.getFields(fieldName);
+ currentGroups.aggregateToAllGroups(fields);
+ }
+ }
+ }
+ // If there are any target aggregation events that do not have a grouping context, e.g. AGE or AGE.1, then all target aggregation events should be
+ // aggregated into all groups.
+ if (aggregateFieldsIndex.hasFieldsWithoutDirectMatch()) {
+ for (String fieldName : aggregateFieldsIndex.fieldsWithoutDirectMatch) {
+ Collection fields = aggregateFieldsIndex.getFields(fieldName);
+ currentGroups.aggregateToAllGroups(fields);
+ }
+ }
+ } else {
+ // No groupings were found in the document. In this case, we will consider this document to contain a placeholder 'empty' grouping, and aggregate
+ // all aggregation entries to the empty grouping.
+ Group group = currentGroups.getGroup(Grouping.emptyGrouping());
+ // Aggregate all aggregate entries to the grouping.
+ Multimap fields = aggregateFieldsIndex.fields;
+ for (String field : fields.keySet()) {
+ group.aggregateAll(field, fields.get(field));
+ }
+ }
+ }
+
+ private boolean groupEntriesFound() {
+ return !groupFieldsIndex.isEmpty();
+ }
+
+ /**
+ * Parses the relevant information from the given entry and returns a {@link Field} that contains the field name, group, instance, and the value. It is
+ * assumed that the entry's key will have the format {@code }, {@code .} or {@code ....}.
+ *
+ * @param entry
+ * the document entry
+ * @return the field entry.
+ */
+ private Field parseField(Map.Entry> entry) {
+ String field = entry.getKey();
+ String name = field;
+ String groupingContext = null;
+ String instance = null;
+
+ int firstPeriod = field.indexOf('.');
+ // If the field name contains at least one period, the field's format is either . or ....
+ if (firstPeriod != -1) {
+ // The field name is everything before the first period.
+ name = field.substring(0, firstPeriod);
+
+ int secondPeriod = field.indexOf(".", firstPeriod + 1);
+ // If a second period is present, we know that field's format is ....
+ if (secondPeriod != -1) {
+ // Parse the group from the substring directly following the name.
+ groupingContext = field.substring(firstPeriod + 1, secondPeriod);
+ // Parse the instance from the substring after the last period.
+ instance = field.substring(field.lastIndexOf(".") + 1);
+ } else {
+ // If there is no second period present, the field's format is ..
+ instance = field.substring(firstPeriod + 1);
+ }
+ }
+
+ // Map the field name to the root model name. This ensures that even if we're grouping fields that can be seen with different model names, e.g. AG, ETA,
+ // and AGE, that the same root name will be used across the board to ensure that they're treated as from the same target group/aggregation field.
+ name = getMappedFieldName(name);
+
+ return new Field(name, groupingContext, instance, entry.getValue());
+ }
+
+ /**
+ * Get the corresponding model mapping for the field. If model mappings have not been provided, the original field will be returned.
+ *
+ * @param field
+ * the field to map
+ * @return the mapped field
+ */
+ private String getMappedFieldName(String field) {
+ return reverseModelMappings.getOrDefault(field, field);
+ }
+
+ /**
+ * This class maintains useful indexes that will be used for determining direct and non-direct matches when grouping and aggregating.
+ */
+ private static class FieldIndex {
+
+ // Map of field names to their entries.
+ private final Multimap fields = ArrayListMultimap.create();
+ // The set of fields with possible direct matches.
+ private final Set fieldsWithPossibleDirectMatch = new HashSet<>();
+ // The set of fields with no direct matches.
+ private final Set fieldsWithoutDirectMatch = new HashSet<>();
+ // Map of field names to Multimaps of grouping contexts to entries.
+ private final Map,Field>> fieldToFieldsByGroupingContextAndInstance = new HashMap<>();
+ // Whether to accept entries that have null attributes for indexing.
+ private final boolean allowNullAttributes;
+
+ private FieldIndex(boolean allowNullAttributes) {
+ this.allowNullAttributes = allowNullAttributes;
+ }
+
+ /**
+ * Index the given {@link Field}. If {@link #allowNullAttributes} is set to false and the given field has a null attribute, it will not be indexed.
+ *
+ * @param field
+ * the field to index
+ */
+ public void index(Field field) {
+ // Check if we can index this field.
+ if (field.getAttribute() != null || allowNullAttributes) {
+ fields.put(field.getBase(), field);
+ // If the field has a grouping context and instance, it's possible that it may have a direct match. Index the field and its grouping
+ // context-instance pair.
+ if (field.hasGroupingContext() && field.hasInstance()) {
+ fieldsWithPossibleDirectMatch.add(field.getBase());
+ Multimap,Field> groupingContextAndInstanceToField = fieldToFieldsByGroupingContextAndInstance.get(field.getBase());
+ if (groupingContextAndInstanceToField == null) {
+ groupingContextAndInstanceToField = HashMultimap.create();
+ fieldToFieldsByGroupingContextAndInstance.put(field.getBase(), groupingContextAndInstanceToField);
+ }
+ groupingContextAndInstanceToField.put(Pair.with(field.getGroupingContext(), field.getInstance()), field);
+ } else {
+ // Otherwise, the field will have no direct matches.
+ fieldsWithoutDirectMatch.add(field.getBase());
+ }
+ }
+ }
+
+ public Multimap getFields() {
+ return fields;
+ }
+
+ public Collection getFields(String field) {
+ return fields.get(field);
+ }
+
+ public Set getFieldsWithPossibleDirectMatch() {
+ return fieldsWithPossibleDirectMatch;
+ }
+
+ public boolean hasFieldsWithPossibleDirectMatch() {
+ return !fieldsWithPossibleDirectMatch.isEmpty();
+ }
+
+ public boolean hasFieldsWithoutDirectMatch() {
+ return !fieldsWithoutDirectMatch.isEmpty();
+ }
+
+ public Set getFieldsWithoutDirectMatch() {
+ return fieldsWithoutDirectMatch;
+ }
+
+ public boolean isEmpty() {
+ return fields.isEmpty();
+ }
+ }
+}
diff --git a/warehouse/query-core/src/main/java/datawave/query/common/grouping/Field.java b/warehouse/query-core/src/main/java/datawave/query/common/grouping/Field.java
new file mode 100644
index 0000000000..7af6af2c16
--- /dev/null
+++ b/warehouse/query-core/src/main/java/datawave/query/common/grouping/Field.java
@@ -0,0 +1,124 @@
+package datawave.query.common.grouping;
+
+import java.util.Collections;
+import java.util.Objects;
+import java.util.Set;
+
+import org.apache.commons.lang.builder.ToStringBuilder;
+
+import datawave.query.attributes.Attribute;
+import datawave.query.attributes.Attributes;
+
+/**
+ * Represents an entry from a document with a field name broken down into its name, group, and instance, and the entry's attribute.
+ */
+class Field {
+
+ private final String base;
+ private final String groupingContext;
+ private final String instance;
+ private final Attribute> attribute;
+ private final Set> attributes;
+
+ public Field(String base, String groupingContext, String instance, Attribute> attribute) {
+ this.base = base;
+ this.groupingContext = groupingContext;
+ this.instance = instance;
+ this.attribute = attribute;
+
+ if (attribute instanceof Attributes) {
+ this.attributes = ((Attributes) attribute).getAttributes();
+ } else {
+ this.attributes = Collections.singleton(attribute);
+ }
+ }
+
+ /**
+ * Return the field base.
+ *
+ * @return the field base
+ */
+ public String getBase() {
+ return base;
+ }
+
+ /**
+ * Return whether this field has a grouping context as part of its name.
+ *
+ * @return true if this field has a group, or false otherwise
+ */
+ public boolean hasGroupingContext() {
+ return groupingContext != null;
+ }
+
+ /**
+ * Return the field's group, or null if the field does not have a group.
+ *
+ * @return the group
+ */
+ public String getGroupingContext() {
+ return groupingContext;
+ }
+
+ /**
+ * Return the field's instance, or null if the field does not have an instance.
+ *
+ * @return the instance
+ */
+ public String getInstance() {
+ return instance;
+ }
+
+ /**
+ * Return whether this field has an instance as part of its name.
+ *
+ * @return true if this field has an instance, or false otherwise
+ */
+ public boolean hasInstance() {
+ return instance != null;
+ }
+
+ /**
+ * Return this field's attribute
+ *
+ * @return the attribute
+ */
+ public Attribute> getAttribute() {
+ return attribute;
+ }
+
+ /**
+ * A convenience method for retrieving all attributes for this {@link Field}, particularly useful when dealing with a {@link Field} that was created with a
+ * multi-value attribute. If the originating attribute was not multi-value, then the set will consist only of the same attribute returned by
+ * {@link #getAttribute()}.
+ *
+ * @return all attributes, or same attribute as returned by {@link #getAttribute()} if the originating attribute was not multi-value
+ */
+ public Set> getAttributes() {
+ return attributes;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ Field field = (Field) o;
+ return Objects.equals(base, field.base) && Objects.equals(groupingContext, field.groupingContext) && Objects.equals(instance, field.instance)
+ && Objects.equals(attributes, field.attributes);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(base, groupingContext, instance, attributes);
+ }
+
+ @Override
+ public String toString() {
+ return new ToStringBuilder(this).append("base", base).append("groupingContext", groupingContext).append("instance", instance)
+ .append("attributes", attributes).toString();
+ }
+}
diff --git a/warehouse/query-core/src/main/java/datawave/query/common/grouping/FieldAggregator.java b/warehouse/query-core/src/main/java/datawave/query/common/grouping/FieldAggregator.java
new file mode 100644
index 0000000000..304dda525b
--- /dev/null
+++ b/warehouse/query-core/src/main/java/datawave/query/common/grouping/FieldAggregator.java
@@ -0,0 +1,404 @@
+package datawave.query.common.grouping;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+
+import org.apache.commons.lang.builder.ToStringBuilder;
+
+import datawave.query.attributes.Attribute;
+
+/**
+ * This class provides functionality to aggregate values for specified target fields using specified aggregation operations.
+ */
+public class FieldAggregator {
+
+ private final Map>> aggregatorMap;
+
+ public FieldAggregator() {
+ aggregatorMap = new HashMap<>();
+ }
+
+ public FieldAggregator(Set sumFields, Set maxFields, Set minFields, Set countFields, Set averageFields) {
+ this();
+ populateAggregators(sumFields, SumAggregator::new);
+ populateAggregators(maxFields, MaxAggregator::new);
+ populateAggregators(minFields, MinAggregator::new);
+ populateAggregators(countFields, CountAggregator::new);
+ populateAggregators(averageFields, AverageAggregator::new);
+ }
+
+ /**
+ * Add an aggregator supplied by the given constructor for each of the given fields to the aggregator map.
+ *
+ * @param fields
+ * the fields
+ * @param constructor
+ * the aggregator constructor
+ */
+ private void populateAggregators(Set fields, Function> constructor) {
+ if (fields != null) {
+ for (String field : fields) {
+ Aggregator> aggregator = constructor.apply(field);
+ Map> map = aggregatorMap.get(field);
+ if (map == null) {
+ map = new HashMap<>();
+ this.aggregatorMap.put(field, map);
+ }
+ map.put(aggregator.getOperation(), aggregator);
+ }
+ }
+ }
+
+ /**
+ * Aggregate the given field to all relevant aggregators.
+ *
+ * @param field
+ * the field to aggregate
+ */
+ public void aggregate(Field field) {
+ if (aggregatorMap.containsKey(field.getBase())) {
+ Collection> aggregators = this.aggregatorMap.get(field.getBase()).values();
+ for (Attribute> attribute : field.getAttributes()) {
+ aggregators.forEach(aggregator -> aggregator.aggregate(attribute));
+ }
+ }
+ }
+
+ /**
+ * Aggregate each of the given fields to all relevant aggregators.
+ *
+ * @param fields
+ * the fields to aggregate
+ */
+ public void aggregateAll(Collection fields) {
+ fields.forEach(this::aggregate);
+ }
+
+ /**
+ * Aggregate each of the given fields to all relevant aggregators for the given field. This is more efficient than {@link #aggregateAll(Collection)} when
+ * you have a collection of fields for the same base field.
+ *
+ * @param field
+ * the field the base field name
+ * @param fields
+ * the fields to aggregate
+ */
+ public void aggregateAll(String field, Collection fields) {
+ if (aggregatorMap.containsKey(field)) {
+ List> attributes = fields.stream().map(Field::getAttribute).collect(Collectors.toList());
+ Collection> aggregators = this.aggregatorMap.get(field).values();
+ for (Aggregator> aggregator : aggregators) {
+ aggregator.aggregateAll(attributes);
+ }
+ }
+ }
+
+ /**
+ * Return the map of fields to their aggregators.
+ *
+ * @return the aggregator map.
+ */
+ public Map>> getAggregatorMap() {
+ return aggregatorMap;
+ }
+
+ public Aggregator> getAggregator(String field, AggregateOperation operation) {
+ Map> map = aggregatorMap.get(field);
+ if (map != null) {
+ return map.get(operation);
+ }
+ return null;
+ }
+
+ /**
+ * Return the set of all fields being aggregated.
+ *
+ * @return the fields
+ */
+ public Collection getFieldsToAggregate() {
+ return aggregatorMap.keySet();
+ }
+
+ /**
+ * Merge the given aggregator into this aggregated fields.
+ *
+ * @param aggregator
+ * the aggregator to merge.
+ */
+ public void mergeAggregator(Aggregator> aggregator) {
+ if (aggregator.hasAggregation()) {
+ Map> map = aggregatorMap.computeIfAbsent(aggregator.getField(), k -> new HashMap<>());
+ if (map.containsKey(aggregator.getOperation())) {
+ Aggregator> currentAggregator = map.get(aggregator.getOperation());
+ if (currentAggregator.hasAggregation()) {
+ currentAggregator.merge(aggregator);
+ } else {
+ map.put(aggregator.getOperation(), aggregator);
+ }
+ } else {
+ map.put(aggregator.getOperation(), aggregator);
+ }
+ }
+
+ }
+
+ /**
+ * Merge the given aggregated fields into this aggregated fields.
+ *
+ * @param other
+ * the aggregated fields to merge in
+ */
+ public void merge(FieldAggregator other) {
+ for (String field : other.aggregatorMap.keySet()) {
+ // If we already have aggregators for this field, merge the aggregators for the current field from the other aggregated fields into this one.
+ if (this.aggregatorMap.containsKey(field)) {
+ Map> thisMap = this.aggregatorMap.get(field);
+ Map> otherMap = other.aggregatorMap.get(field);
+ for (AggregateOperation operation : otherMap.keySet()) {
+ if (thisMap.containsKey(operation)) {
+ Aggregator> currentAggregator = thisMap.get(operation);
+ Aggregator> otherAggregator = otherMap.get(operation);
+ if (currentAggregator.hasAggregation() && otherAggregator.hasAggregation()) {
+ currentAggregator.merge(otherAggregator);
+ } else if (otherAggregator.hasAggregation()) {
+ thisMap.put(operation, otherAggregator);
+ }
+ } else {
+ thisMap.put(operation, otherMap.get(operation));
+ }
+ }
+ } else {
+ // If no aggregators exist in this aggregated fields for the current field, add all aggregators for it.
+ this.aggregatorMap.put(field, new HashMap<>(other.aggregatorMap.get(field)));
+ }
+ }
+ }
+
+ @Override
+ public String toString() {
+ return aggregatorMap.toString();
+ }
+
+ /**
+ * A factory that will generate new {@link FieldAggregator} with the designated sum, max, min, count, and average aggregation field targets.
+ */
+ public static class Factory {
+
+ private final Set sumFields;
+ private final Set maxFields;
+ private final Set minFields;
+ private final Set countFields;
+ private final Set averageFields;
+ private final Set allFields;
+
+ public Factory() {
+ this.sumFields = new HashSet<>();
+ this.maxFields = new HashSet<>();
+ this.minFields = new HashSet<>();
+ this.countFields = new HashSet<>();
+ this.averageFields = new HashSet<>();
+ this.allFields = new HashSet<>();
+ }
+
+ /**
+ * Set the fields for which to find the aggregated sum.
+ *
+ * @param fields
+ * the fields
+ * @return this factory
+ */
+ public Factory withSumFields(Set fields) {
+ addFields(this.sumFields, fields);
+ return this;
+ }
+
+ /**
+ * Set the fields for which to find the aggregated sum.
+ *
+ * @param fields
+ * the fields
+ * @return this factory
+ */
+ public Factory withSumFields(String... fields) {
+ addFields(this.sumFields, fields);
+ return this;
+ }
+
+ /**
+ * Set the fields for which to find the aggregated max.
+ *
+ * @param fields
+ * the fields
+ * @return this factory
+ */
+ public Factory withMaxFields(Set fields) {
+ addFields(this.maxFields, fields);
+ return this;
+ }
+
+ /**
+ * Set the fields for which to find the aggregated max.
+ *
+ * @param fields
+ * the fields
+ * @return this factory
+ */
+ public Factory withMaxFields(String... fields) {
+ addFields(this.maxFields, fields);
+ return this;
+ }
+
+ /**
+ * Set the fields for which to find the aggregated min.
+ *
+ * @param fields
+ * the fields
+ * @return this factory
+ */
+ public Factory withMinFields(Set fields) {
+ addFields(this.minFields, fields);
+ return this;
+ }
+
+ /**
+ * Set the fields for which to find the aggregated min.
+ *
+ * @param fields
+ * the fields
+ * @return this factory
+ */
+ public Factory withMinFields(String... fields) {
+ addFields(this.minFields, fields);
+ return this;
+ }
+
+ /**
+ * Set the fields for which to find the total number of times seen.
+ *
+ * @param fields
+ * the fields
+ * @return this factory
+ */
+ public Factory withCountFields(Set fields) {
+ addFields(this.countFields, fields);
+ return this;
+ }
+
+ /**
+ * Set the fields for which to find the aggregated count.
+ *
+ * @param fields
+ * the fields
+ * @return this factory
+ */
+ public Factory withCountFields(String... fields) {
+ addFields(this.countFields, fields);
+ return this;
+ }
+
+ /**
+ * Set the fields for which to find the aggregated average.
+ *
+ * @param fields
+ * the fields
+ * @return this factory
+ */
+ public Factory withAverageFields(Set fields) {
+ addFields(this.averageFields, fields);
+ return this;
+ }
+
+ /**
+ * Set the fields for which to find the aggregated average.
+ *
+ * @param fields
+ * the fields
+ * @return this factory
+ */
+ public Factory withAverageFields(String... fields) {
+ addFields(this.averageFields, fields);
+ return this;
+ }
+
+ /**
+ * Add the given fields into the given set.
+ *
+ * @param set
+ * the set to add the fields to
+ * @param fields
+ * the fields to add
+ */
+ private void addFields(Set set, Collection fields) {
+ if (fields != null) {
+ set.addAll(fields);
+ allFields.addAll(fields);
+ }
+ }
+
+ private void addFields(Set set, String... fields) {
+ addFields(set, Arrays.asList(fields));
+ }
+
+ /**
+ * Return a new {@link FieldAggregator} with the configured target aggregation fields.
+ *
+ * @return a new {@link FieldAggregator} instance
+ */
+ public FieldAggregator newInstance() {
+ return hasFieldsToAggregate() ? new FieldAggregator(sumFields, maxFields, minFields, countFields, averageFields) : new FieldAggregator();
+ }
+
+ /**
+ * Return whether this factory has any target aggregation fields set.
+ *
+ * @return true if this factory has any target aggregation fields, or false otherwise
+ */
+ public boolean hasFieldsToAggregate() {
+ return !allFields.isEmpty();
+ }
+
+ /**
+ * Return whether the given field matches a target aggregation field in this factory.
+ *
+ * @param field
+ * the field
+ * @return true if the given field is a target for aggregation, or false otherwise
+ */
+ public boolean isFieldToAggregate(String field) {
+ return allFields.contains(field);
+ }
+
+ @Override
+ public String toString() {
+ return new ToStringBuilder(this).append("sumFields", sumFields).append("maxFields", maxFields).append("minFields", minFields)
+ .append("countFields", countFields).append("averageFields", averageFields).append("allFields", allFields).toString();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ Factory factory = (Factory) o;
+ return Objects.equals(sumFields, factory.sumFields) && Objects.equals(maxFields, factory.maxFields) && Objects.equals(minFields, factory.minFields)
+ && Objects.equals(countFields, factory.countFields) && Objects.equals(averageFields, factory.averageFields)
+ && Objects.equals(allFields, factory.allFields);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(sumFields, maxFields, minFields, countFields, averageFields, allFields);
+ }
+ }
+}
diff --git a/warehouse/query-core/src/main/java/datawave/query/common/grouping/Group.java b/warehouse/query-core/src/main/java/datawave/query/common/grouping/Group.java
new file mode 100644
index 0000000000..dc40eee9bd
--- /dev/null
+++ b/warehouse/query-core/src/main/java/datawave/query/common/grouping/Group.java
@@ -0,0 +1,168 @@
+package datawave.query.common.grouping;
+
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.accumulo.core.security.ColumnVisibility;
+import org.apache.commons.lang.builder.ToStringBuilder;
+
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.Multimap;
+
+/**
+ * Represents a grouping of values for fields specified via the #GROUP_BY functionality, with information about the total number of times the grouping was seen,
+ * values for target aggregation fields that were matched to this group, and the different column visibilities seen.
+ */
+public class Group {
+
+ /**
+ * The distinct set of values that represent this grouping.
+ */
+ private final ImmutableGrouping grouping;
+
+ /**
+ * The different column visibilities seen for each attribute that makes up the grouping.
+ */
+ private final Multimap,ColumnVisibility> attributeVisibilities = HashMultimap.create();
+
+ /**
+ * The column visibilities for each document that contributed entries to this grouping.
+ */
+ private final Set documentVisibilities = new HashSet<>();
+
+ /**
+ * The total number of times the distinct grouping was seen.
+ */
+ private int count;
+
+ /**
+ * The aggregated values for any specified fields to aggregate.
+ */
+ private FieldAggregator fieldAggregator = new FieldAggregator();
+
+ public Group(Grouping grouping) {
+ this(grouping, 0);
+ }
+
+ public Group(Grouping grouping, int count) {
+ this.grouping = new ImmutableGrouping(grouping);
+ addAttributeVisibilities(this.grouping);
+ this.count = count;
+ }
+
+ /**
+ * Returns the distinct set of values that represent this grouping.
+ *
+ * @return the grouping
+ */
+ public Grouping getGrouping() {
+ return grouping;
+ }
+
+ /**
+ * Add the column visibilities from each of the given attributes to the set of attribute visibilities for this group.
+ *
+ * @param grouping
+ * the attributes to add visibilities from
+ */
+ public void addAttributeVisibilities(Grouping grouping) {
+ for (GroupingAttribute> attribute : grouping) {
+ attributeVisibilities.put(attribute, attribute.getColumnVisibility());
+ }
+ }
+
+ /**
+ * Return the set of column visibilities seen for the given attribute.
+ *
+ * @param attribute
+ * the attribute
+ * @return the column visibilities seen for the given attributes
+ */
+ public Collection getVisibilitiesForAttribute(GroupingAttribute> attribute) {
+ return attributeVisibilities.get(attribute);
+ }
+
+ /**
+ * Add the column visibility to the set of visibilities of documents for which we have seen the grouping of this group in.
+ *
+ * @param columnVisibility
+ * the visibility to add
+ */
+ public void addDocumentVisibility(ColumnVisibility columnVisibility) {
+ this.documentVisibilities.add(columnVisibility);
+ }
+
+ /**
+ * Return the set of all distinct column visibilities from documents that we have seen this group in.
+ *
+ * @return the document column visibilities
+ */
+ public Set getDocumentVisibilities() {
+ return documentVisibilities;
+ }
+
+ /**
+ * Increment the number of times we have seen this grouping by one.
+ */
+ public void incrementCount() {
+ this.count++;
+ }
+
+ /**
+ * Returns the number of times we have seen this grouping.
+ *
+ * @return the number of times we've seen this group.
+ */
+ public int getCount() {
+ return count;
+ }
+
+ /**
+ * Returns the aggregated fields for this group.
+ *
+ * @return the aggregated fields.
+ */
+ public FieldAggregator getFieldAggregator() {
+ return fieldAggregator;
+ }
+
+ /**
+ * Set the aggregated fields for this group.
+ *
+ * @param fieldAggregator
+ * the aggregated fields to set
+ */
+ public void setFieldAggregator(FieldAggregator fieldAggregator) {
+ this.fieldAggregator = fieldAggregator;
+ }
+
+ public void aggregateAll(Collection fields) {
+ fieldAggregator.aggregateAll(fields);
+ }
+
+ public void aggregateAll(String field, Collection fields) {
+ fieldAggregator.aggregateAll(field, fields);
+ }
+
+ /**
+ * Merge the given group into this group. The attribute visibilities and document visibilities from the other group will be added into this group. The count
+ * for this group will be incremented by the count of the other group. The aggregated fields of the other group will be merged into the aggregated fields of
+ * this group.
+ *
+ * @param other
+ * the group to merge
+ */
+ public void merge(Group other) {
+ this.attributeVisibilities.putAll(other.attributeVisibilities);
+ this.documentVisibilities.addAll(other.documentVisibilities);
+ this.count += other.count;
+ this.fieldAggregator.merge(other.fieldAggregator);
+ }
+
+ @Override
+ public String toString() {
+ return new ToStringBuilder(this).append("attributes", grouping).append("attributeVisibilities", attributeVisibilities)
+ .append("documentVisibilities", documentVisibilities).append("count", count).append("aggregatedFields", fieldAggregator).toString();
+ }
+}
diff --git a/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupFields.java b/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupFields.java
new file mode 100644
index 0000000000..196db28e23
--- /dev/null
+++ b/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupFields.java
@@ -0,0 +1,466 @@
+package datawave.query.common.grouping;
+
+import java.io.Serializable;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import org.apache.commons.lang.StringUtils;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonValue;
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Multimap;
+import com.google.common.collect.Sets;
+
+import datawave.query.Constants;
+import datawave.query.jexl.JexlASTHelper;
+
+/**
+ * Represents a set of fields that have been specified within a {@code #groupby()} function, as well as any fields specified in the functions {@code #sum()},
+ * {@code #count()}, {@code #average()}, {@code #min()}, and {@code #max()} that should be used when preforming a group-by operation on documents. This class
+ * can easily be captured as a parameter string using {@link GroupFields#toString()}, and transformed back into a {@link GroupFields} instance via
+ * {@link GroupFields#from(String)}.
+ */
+public class GroupFields implements Serializable {
+
+ private static final String GROUP = "GROUP";
+ private static final String SUM = "SUM";
+ private static final String COUNT = "COUNT";
+ private static final String AVERAGE = "AVERAGE";
+ private static final String MIN = "MIN";
+ private static final String MAX = "MAX";
+ private static final String MODEL_MAP = "REVERSE_MODEL_MAP";
+
+ private Set groupByFields = new HashSet<>();
+ private Set sumFields = new HashSet<>();
+ private Set countFields = new HashSet<>();
+ private Set averageFields = new HashSet<>();
+ private Set minFields = new HashSet<>();
+ private Set maxFields = new HashSet<>();
+ private Map reverseModelMap = new HashMap<>();
+
+ /**
+ * Returns a new {@link GroupFields} parsed the given string. The string is expected to have the format returned by {@link GroupFields#toString()}, but may
+ * also be a comma-delimited string of fields to group-by to support backwards-compatibility with the legacy format. See below for certain edge cases:
+ *
+ * - Given null, null will be returned.
+ * - Given an empty or blank string, an empty {@link GroupFields} will be returned.
+ * - Given a comma-delimited list of fields, e.g {@code AGE,GENDER}, a {@link GroupFields} with the fields set as the group-by fields will be
+ * returned.
+ *
+ *
+ * @param string
+ * the string to parse
+ * @return the parsed {@link GroupFields}
+ */
+ @JsonCreator
+ public static GroupFields from(String string) {
+ if (string == null) {
+ return null;
+ }
+
+ // Strip whitespaces.
+ string = StringUtils.deleteWhitespace(string);
+
+ GroupFields groupFields = new GroupFields();
+ if (!string.isEmpty()) {
+ // The string contains group fields in the latest formatting GROUP(field,...)...
+ if (string.contains(Constants.LEFT_PAREN)) {
+ // Individual elements are separated by a pipe.
+ String[] elements = StringUtils.split(string, Constants.PIPE);
+
+ // Each element starts NAME().
+ for (String element : elements) {
+ int leftParen = element.indexOf(Constants.LEFT_PAREN);
+ int rightParen = element.length() - 1;
+ String name = element.substring(0, leftParen);
+ String elementContents = element.substring(leftParen + 1, rightParen);
+ switch (name) {
+ case GROUP:
+ groupFields.groupByFields = parseSet(elementContents);
+ break;
+ case SUM:
+ groupFields.sumFields = parseSet(elementContents);
+ break;
+ case COUNT:
+ groupFields.countFields = parseSet(elementContents);
+ break;
+ case AVERAGE:
+ groupFields.averageFields = parseSet(elementContents);
+ break;
+ case MIN:
+ groupFields.minFields = parseSet(elementContents);
+ break;
+ case MAX:
+ groupFields.maxFields = parseSet(elementContents);
+ break;
+ case MODEL_MAP:
+ groupFields.reverseModelMap = parseMap(elementContents);
+ break;
+ default:
+ throw new IllegalArgumentException("Invalid element " + name);
+ }
+ }
+ } else {
+ // Otherwise, the string may be in the legacy format of a comma-delimited string with group-fields only.
+ String[] groupByFields = StringUtils.split(string, Constants.PARAM_VALUE_SEP);
+ groupFields.setGroupByFields(Sets.newHashSet(groupByFields));
+ }
+ }
+ return groupFields;
+ }
+
+ // Parse a set of fields from the string.
+ private static Set parseSet(String str) {
+ return Sets.newHashSet(StringUtils.split(str, Constants.COMMA));
+ }
+
+ // Parse a map from the given string.
+ private static Map parseMap(String str) {
+ Map map = new HashMap<>();
+ String[] entries = StringUtils.split(str, Constants.COLON);
+ for (String entry : entries) {
+ int equals = entry.indexOf(Constants.EQUALS);
+ String key = entry.substring(0, equals);
+ String value = entry.substring(equals + 1);
+ map.put(key, value);
+ }
+ return map;
+ }
+
+ /**
+ * Return a copy of the given {@link GroupFields}.
+ *
+ * @param other
+ * the other instance to copy
+ * @return the copy
+ */
+ public static GroupFields copyOf(GroupFields other) {
+ if (other == null) {
+ return null;
+ }
+
+ GroupFields copy = new GroupFields();
+ copy.groupByFields = other.groupByFields == null ? null : Sets.newHashSet(other.groupByFields);
+ copy.sumFields = other.sumFields == null ? null : Sets.newHashSet(other.sumFields);
+ copy.countFields = other.countFields == null ? null : Sets.newHashSet(other.countFields);
+ copy.averageFields = other.averageFields == null ? null : Sets.newHashSet(other.averageFields);
+ copy.minFields = other.minFields == null ? null : Sets.newHashSet(other.minFields);
+ copy.maxFields = other.maxFields == null ? null : Sets.newHashSet(other.maxFields);
+ copy.reverseModelMap = other.reverseModelMap == null ? null : Maps.newHashMap(other.reverseModelMap);
+ return copy;
+ }
+
+ /**
+ * Set the fields to group by.
+ *
+ * @param fields
+ * the fields
+ */
+ public void setGroupByFields(Set fields) {
+ this.groupByFields = fields;
+ }
+
+ /**
+ * Set the fields to sum.
+ *
+ * @param fields
+ * the fields
+ */
+ public void setSumFields(Set fields) {
+ this.sumFields = fields;
+ }
+
+ /**
+ * Set the fields to count.
+ *
+ * @param fields
+ * the fields
+ */
+ public void setCountFields(Set fields) {
+ this.countFields = fields;
+ }
+
+ /**
+ * Set the fields to average.
+ *
+ * @param fields
+ * the fields
+ */
+ public void setAverageFields(Set fields) {
+ this.averageFields = fields;
+ }
+
+ /**
+ * Set the fields to find the min of.
+ *
+ * @param fields
+ * the fields
+ */
+ public void setMinFields(Set fields) {
+ this.minFields = fields;
+ }
+
+ /**
+ * Set the fields to find the max of.
+ *
+ * @param fields
+ * the fields
+ */
+ public void setMaxFields(Set fields) {
+ this.maxFields = fields;
+ }
+
+ /**
+ * Return the fields to group by.
+ *
+ * @return the fields
+ */
+ public Set getGroupByFields() {
+ return groupByFields;
+ }
+
+ /**
+ * Return the fields to sum.
+ *
+ * @return the fields
+ */
+ public Set getSumFields() {
+ return sumFields;
+ }
+
+ /**
+ * Return the fields to count.
+ *
+ * @return the fields
+ */
+ public Set getCountFields() {
+ return countFields;
+ }
+
+ /**
+ * Return the fields to average.
+ *
+ * @return the fields
+ */
+ public Set getAverageFields() {
+ return averageFields;
+ }
+
+ /**
+ * Return the fields to find the min of.
+ *
+ * @return the fields
+ */
+ public Set getMinFields() {
+ return minFields;
+ }
+
+ /**
+ * Return the fields to find the max of.
+ *
+ * @return the fields
+ */
+ public Set getMaxFields() {
+ return maxFields;
+ }
+
+ /**
+ * Return whether this {@link GroupFields} has any fields to group by.
+ *
+ * @return true if there are fields to group by, or false otherwise
+ */
+ public boolean hasGroupByFields() {
+ return groupByFields != null && !groupByFields.isEmpty();
+ }
+
+ /**
+ * Return the set of all fields to group by, sum, count, average, and find the min and max of that must be included in projection.
+ *
+ * @return the fields required to be included in projection
+ */
+ public Set getProjectionFields() {
+ Set fields = new HashSet<>();
+ fields.addAll(this.groupByFields);
+ fields.addAll(this.sumFields);
+ fields.addAll(this.countFields);
+ fields.addAll(this.averageFields);
+ fields.addAll(this.minFields);
+ fields.addAll(this.maxFields);
+ fields.addAll(this.reverseModelMap.keySet());
+ fields.addAll(this.reverseModelMap.values());
+ return fields;
+ }
+
+ /**
+ * Deconstruct the identifiers of all fields in this {@link GroupFields}.
+ */
+ public void deconstructIdentifiers() {
+ this.groupByFields = deconstructIdentifiers(this.groupByFields);
+ this.sumFields = deconstructIdentifiers(this.sumFields);
+ this.countFields = deconstructIdentifiers(this.countFields);
+ this.averageFields = deconstructIdentifiers(this.averageFields);
+ this.minFields = deconstructIdentifiers(this.minFields);
+ this.maxFields = deconstructIdentifiers(this.maxFields);
+ }
+
+ // Return a copy of the given set with all identifiers deconstructed.
+ private Set deconstructIdentifiers(Set set) {
+ return set.stream().map(JexlASTHelper::deconstructIdentifier).collect(Collectors.toSet());
+ }
+
+ /**
+ * Modify this {@link GroupFields} to ensure that all sets of fields also include their alternative mappings, and set the model map to the given map.
+ *
+ * @param modelMap
+ * the map to retrieve alternative field mappings from
+ */
+ public void remapFields(Multimap modelMap, Map reverseModelMap) {
+ this.groupByFields = remap(this.groupByFields, modelMap);
+ this.sumFields = remap(this.sumFields, modelMap);
+ this.countFields = remap(this.countFields, modelMap);
+ this.averageFields = remap(this.averageFields, modelMap);
+ this.minFields = remap(this.minFields, modelMap);
+ this.maxFields = remap(this.maxFields, modelMap);
+
+ // Make a copy of the given reverse model map that only contains relevant mappings for efficiency.
+ Set allFields = new HashSet<>();
+ allFields.addAll(groupByFields);
+ allFields.addAll(sumFields);
+ allFields.addAll(countFields);
+ allFields.addAll(averageFields);
+ allFields.addAll(minFields);
+ allFields.addAll(maxFields);
+
+ this.reverseModelMap = new HashMap<>();
+ for (String field : allFields) {
+ if (reverseModelMap.containsKey(field)) {
+ this.reverseModelMap.put(field, reverseModelMap.get(field));
+ }
+ }
+
+ // now we can reduce the fields to only those that map to themselves wrt the reverse model map
+ this.groupByFields = reduce(this.groupByFields, this.reverseModelMap);
+ this.sumFields = reduce(this.sumFields, this.reverseModelMap);
+ this.countFields = reduce(this.countFields, this.reverseModelMap);
+ this.averageFields = reduce(this.averageFields, this.reverseModelMap);
+ this.minFields = reduce(this.minFields, this.reverseModelMap);
+ this.maxFields = reduce(this.maxFields, this.reverseModelMap);
+ }
+
+ private Set reduce(Set set, Map map) {
+ return set.stream().filter(s -> s.equals(map.getOrDefault(s, s))).collect(Collectors.toSet());
+ }
+
+ // Return a copy of the given set with all alternative field mappings included.
+ private Set remap(Set set, Multimap map) {
+ Set newMappings = new HashSet<>(set);
+ for (String field : set) {
+ field = field.toUpperCase();
+ if (map.containsKey(field)) {
+ newMappings.addAll(map.get(field));
+ }
+ }
+ return newMappings;
+ }
+
+ /**
+ * Return the model map. This map will never be null, but may be empty if this {@link GroupFields} was never remapped via
+ * {@link GroupFields#remapFields(Multimap, Map)}.
+ *
+ * @return the reverse model map
+ */
+ public Map getReverseModelMap() {
+ return reverseModelMap;
+ }
+
+ /**
+ * Return a new {@link FieldAggregator.Factory} instance configured with the aggregation fields of this {@link GroupFields}.
+ *
+ * @return a configured {@link FieldAggregator.Factory} instance
+ */
+ public FieldAggregator.Factory getFieldAggregatorFactory() {
+ return new FieldAggregator.Factory().withSumFields(sumFields).withCountFields(countFields).withAverageFields(averageFields).withMinFields(minFields)
+ .withMaxFields(maxFields);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ GroupFields that = (GroupFields) o;
+ return Objects.equals(groupByFields, that.groupByFields) && Objects.equals(sumFields, that.sumFields) && Objects.equals(countFields, that.countFields)
+ && Objects.equals(averageFields, that.averageFields) && Objects.equals(minFields, that.minFields)
+ && Objects.equals(maxFields, that.maxFields) && Objects.equals(reverseModelMap, that.reverseModelMap);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(groupByFields, sumFields, countFields, averageFields, minFields, maxFields, reverseModelMap);
+ }
+
+ @JsonValue
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ writeFormattedSet(sb, GROUP, this.groupByFields);
+ writeFormattedSet(sb, SUM, this.sumFields);
+ writeFormattedSet(sb, COUNT, this.countFields);
+ writeFormattedSet(sb, AVERAGE, this.averageFields);
+ writeFormattedSet(sb, MIN, this.minFields);
+ writeFormattedSet(sb, MAX, this.maxFields);
+ writeFormattedModelMap(sb);
+ return sb.toString();
+ }
+
+ // Write the given set if not empty to the given string builder.
+ private void writeFormattedSet(StringBuilder sb, String name, Set set) {
+ if (!set.isEmpty()) {
+ if (sb.length() > 0) {
+ sb.append(Constants.PIPE);
+ }
+ sb.append(name);
+ sb.append(Constants.LEFT_PAREN);
+ Iterator iterator = set.iterator();
+ while (iterator.hasNext()) {
+ String next = iterator.next();
+ sb.append(next);
+ if (iterator.hasNext()) {
+ sb.append(Constants.COMMA);
+ }
+ }
+ sb.append(Constants.RIGHT_PAREN);
+ }
+ }
+
+ // Write the model map if not empty to the given string builder.
+ private void writeFormattedModelMap(StringBuilder sb) {
+ if (!reverseModelMap.isEmpty()) {
+ if (sb.length() > 0) {
+ sb.append(Constants.PIPE);
+ }
+ sb.append(MODEL_MAP).append(Constants.LEFT_PAREN);
+ Iterator> entryIterator = reverseModelMap.entrySet().iterator();
+ while (entryIterator.hasNext()) {
+ Map.Entry next = entryIterator.next();
+ sb.append(next.getKey()).append(Constants.EQUALS).append(next.getValue());
+ if (entryIterator.hasNext()) {
+ sb.append(Constants.COLON);
+ }
+ }
+ sb.append(Constants.RIGHT_PAREN);
+ }
+ }
+}
diff --git a/warehouse/query-core/src/main/java/datawave/query/common/grouping/Grouping.java b/warehouse/query-core/src/main/java/datawave/query/common/grouping/Grouping.java
new file mode 100644
index 0000000000..736dbb4c0d
--- /dev/null
+++ b/warehouse/query-core/src/main/java/datawave/query/common/grouping/Grouping.java
@@ -0,0 +1,118 @@
+package datawave.query.common.grouping;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.function.Predicate;
+
+/**
+ * This class represents a {@link HashSet} of {@link GroupingAttribute} elements that maintains a cached hashcode that is calculated once at instantiation, and
+ * subsequently recalculated any time this set is modified. This class is used as a key within maps and as such, the cached hashcode allows us to avoid
+ * calculating the hashcode each time a search operation is performed on the keys of the maps.
+ */
+public class Grouping extends HashSet> {
+
+ public static final Grouping EMPTY_GROUPING = new Grouping(Collections.emptySet());
+
+ public static Grouping emptyGrouping() {
+ return EMPTY_GROUPING;
+ }
+
+ // The cached hashcode.
+ private int cachedHashcode;
+
+ /**
+ * Return a new {@link Grouping} instance containing the elements of the given collection.
+ *
+ * @param collection
+ * the collection
+ * @return the new grouping
+ */
+ public static Grouping of(Collection extends GroupingAttribute>> collection) {
+ return new Grouping(collection);
+ }
+
+ public Grouping() {
+ super();
+ updateCachedHashcode();
+ }
+
+ public Grouping(GroupingAttribute> attribute) {
+ super();
+ add(attribute);
+ updateCachedHashcode();
+ }
+
+ public Grouping(Collection extends GroupingAttribute>> collection) {
+ super(collection);
+ updateCachedHashcode();
+ }
+
+ @Override
+ public boolean add(GroupingAttribute> groupingAttribute) {
+ boolean modified = super.add(groupingAttribute);
+ if (modified) {
+ updateCachedHashcode();
+ }
+ return modified;
+ }
+
+ @Override
+ public boolean addAll(Collection extends GroupingAttribute>> collection) {
+ boolean modified = super.addAll(collection);
+ if (modified) {
+ updateCachedHashcode();
+ }
+ return modified;
+ }
+
+ @Override
+ public boolean remove(Object o) {
+ boolean modified = super.remove(o);
+ if (modified) {
+ updateCachedHashcode();
+ }
+ return modified;
+ }
+
+ @Override
+ public boolean removeAll(Collection> collection) {
+ boolean modified = super.removeAll(collection);
+ if (modified) {
+ updateCachedHashcode();
+ }
+ return modified;
+ }
+
+ @Override
+ public boolean removeIf(Predicate super GroupingAttribute>> filter) {
+ boolean modified = super.removeIf(filter);
+ if (modified) {
+ updateCachedHashcode();
+ }
+ return modified;
+ }
+
+ @Override
+ public void clear() {
+ super.clear();
+ updateCachedHashcode();
+ }
+
+ /**
+ * Returns the cached hashcode.
+ *
+ * @return the hashcode
+ */
+ @Override
+ public int hashCode() {
+ return cachedHashcode;
+ }
+
+ /**
+ * Update the cached hashcode based on the current elements.
+ */
+ private void updateCachedHashcode() {
+ cachedHashcode = super.hashCode();
+ }
+}
diff --git a/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupingAttribute.java b/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupingAttribute.java
new file mode 100644
index 0000000000..54db8ddf71
--- /dev/null
+++ b/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupingAttribute.java
@@ -0,0 +1,62 @@
+package datawave.query.common.grouping;
+
+import org.apache.accumulo.core.data.Key;
+import org.apache.commons.lang.builder.HashCodeBuilder;
+
+import datawave.data.type.Type;
+import datawave.query.attributes.Attribute;
+import datawave.query.attributes.TypeAttribute;
+
+/**
+ * This class serves as a wrapper for the {@link TypeAttribute} that overrides the default {@code equals()} and {@code hashCode()} behavior so that equality is
+ * determined by the attribute's field and value, and the hashCode is generated solely with the attribute's value.
+ *
+ * @param
+ * the delegate type
+ */
+public class GroupingAttribute> extends TypeAttribute {
+
+ public GroupingAttribute(Type type, Key key, boolean toKeep) {
+ super(type, key, toKeep);
+ }
+
+ /**
+ * Returns whether the other attribute has the same field and value.
+ *
+ * @param other
+ * the other attribute
+ * @return true if the attribute is considered equal, or false otherwise
+ */
+ @Override
+ public boolean equals(Object other) {
+ if (null == other) {
+ return false;
+ }
+ if (other instanceof TypeAttribute>) {
+ TypeAttribute> otherType = (TypeAttribute>) other;
+ return this.getType().equals(otherType.getType()) && isMetadataRowEqual(otherType);
+ }
+ return false;
+ }
+
+ /**
+ * Return whether the metadata row of this attribute is considered equal to the row of the other attribute.
+ *
+ * @param other
+ * the other attribute
+ * @return true if the metadata row is equal, or false otherwise
+ */
+ private boolean isMetadataRowEqual(Attribute> other) {
+ return this.isMetadataSet() == other.isMetadataSet() && (!this.isMetadataSet() || (this.getMetadata().getRow().equals(other.getMetadata().getRow())));
+ }
+
+ /**
+ * Returns the hashcode of the attribute's value.
+ *
+ * @return the hashcode of the attribute's value
+ */
+ @Override
+ public int hashCode() {
+ return new HashCodeBuilder(2099, 2129).append(getType().getDelegateAsString()).toHashCode();
+ }
+}
diff --git a/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupingUtil.java b/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupingUtil.java
deleted file mode 100644
index 2ac3d8a461..0000000000
--- a/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupingUtil.java
+++ /dev/null
@@ -1,330 +0,0 @@
-package datawave.query.common.grouping;
-
-import static org.slf4j.LoggerFactory.getLogger;
-
-import java.math.BigDecimal;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.LinkedHashSet;
-import java.util.Map;
-import java.util.Set;
-import java.util.stream.IntStream;
-
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.security.ColumnVisibility;
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.slf4j.Logger;
-
-import com.google.common.collect.HashMultimap;
-import com.google.common.collect.Maps;
-import com.google.common.collect.Multimap;
-import com.google.common.collect.SortedSetMultimap;
-import com.google.common.collect.TreeMultimap;
-
-import datawave.data.type.Type;
-import datawave.marking.MarkingFunctions;
-import datawave.query.attributes.Attribute;
-import datawave.query.attributes.Document;
-import datawave.query.attributes.TypeAttribute;
-
-/**
- * Provides functionality commonly needed to group documents (regardless if done server or client side).
- *
- * This class and its methods aren't static so that we don't run into concurrency issues, although all required state should be passed into the individual
- * methods and not kept in this class. Calling classes could extend this class to inherit the methods, but the state still shouldn't be inherited because not
- * all callers will be able to easily extend this class if they already/need to extend other parents.
- */
-public class GroupingUtil {
-
- private static final Logger log = getLogger(GroupingUtil.class);
-
- public ColumnVisibility combine(Collection in, MarkingFunctions markingFunctions) {
- try {
- ColumnVisibility columnVisibility = markingFunctions.combine(in);
- log.trace("combined {} into {}", in, columnVisibility);
- return columnVisibility;
- } catch (MarkingFunctions.Exception e) {
- log.warn("unable to combine visibilities from {}", in);
- }
- return new ColumnVisibility();
- }
-
- /**
- * This method mutates the countingMap argument that is passed into it. The caller may either anticipate that (and hopefully make a comment when this method
- * is called that it is expecting the countingMap to be mutated) or the caller can reset the instance of countingMap by calling getCountingMap on the
- * GroupInfo object (clearer, but relies more on garbage collection)
- *
- * @param entry
- * the map entry
- * @param groupFieldsSet
- * group fields set
- * @param countingMap
- * the counting map
- * @return grouping info
- */
- public GroupingInfo getGroupingInfo(Map.Entry entry, Set groupFieldsSet, GroupCountingHashMap countingMap) {
- return getGroupingInfo(entry, groupFieldsSet, countingMap, null);
- }
-
- public GroupingInfo getGroupingInfo(Map.Entry entry, Set groupFieldsSet, GroupCountingHashMap countingMap,
- Map reverseModelMapping) {
- log.trace("apply to {}", entry);
-
- // mapping of field name (with grouping context) to value attribute
- Map> fieldMap = Maps.newHashMap();
-
- // holds the aggregated column visibilities for each grouped event
- Multimap>,ColumnVisibility> fieldVisibilities = HashMultimap.create();
-
- if (entry != null) {
- Set expandedGroupFieldsList = new LinkedHashSet<>();
- Map>> dictionary = entry.getValue().getDictionary();
- Map countKeyMap = new HashMap<>();
- dictionary.keySet().stream().filter(key -> key.startsWith("COUNT")).filter(countKey -> entry.getValue().getDictionary().containsKey(countKey))
- .forEach(countKey -> {
- TypeAttribute> countTypeAttribute = ((TypeAttribute>) entry.getValue().getDictionary().get(countKey));
- int count = ((BigDecimal) countTypeAttribute.getType().getDelegate()).intValue();
- countKeyMap.put(countKey, count);
- });
-
- Multimap fieldToFieldWithContextMap = getFieldToFieldWithGroupingContextMap(entry.getValue(), expandedGroupFieldsList, fieldMap,
- groupFieldsSet, reverseModelMapping);
- log.trace("got a new fieldToFieldWithContextMap: {}", fieldToFieldWithContextMap);
- int longest = longestValueList(fieldToFieldWithContextMap);
- for (int i = 0; i < longest; i++) {
- Collection> fieldCollection = new HashSet<>();
- String currentGroupingContext = "";
- for (String fieldListItem : expandedGroupFieldsList) {
- log.trace("fieldListItem: {}", fieldListItem);
- Collection gtNames = fieldToFieldWithContextMap.get(fieldListItem);
- if (gtNames == null || gtNames.isEmpty()) {
- log.trace("gtNames: {}", gtNames);
- log.trace("fieldToFieldWithContextMap: {} did not contain: {}", fieldToFieldWithContextMap, fieldListItem);
- } else {
- String gtName = gtNames.iterator().next();
- int idx = gtName.indexOf('.');
- if (idx != -1) {
- currentGroupingContext = gtName.substring(idx + 1);
- }
- if (!fieldListItem.equals(gtName)) {
- fieldToFieldWithContextMap.remove(fieldListItem, gtName);
- }
- log.trace("fieldToFieldWithContextMap now: {}", fieldToFieldWithContextMap);
- log.trace("gtName: {}", gtName);
- fieldCollection.add(fieldMap.get(gtName));
- }
- }
-
- if (fieldCollection.size() == expandedGroupFieldsList.size()) {
-
- // get the count out of the countKeyMap
- Integer count = countKeyMap.get("COUNT." + currentGroupingContext);
- if (count == null)
- count = 1;
- // see above comment about the COUNT field
- log.trace("adding {} of {} to counting map", count, fieldCollection);
- IntStream.range(0, count).forEach(j -> countingMap.add(fieldCollection));
- fieldVisibilities.put(fieldCollection, entry.getValue().getColumnVisibility());
- log.trace("put {} to {} into fieldVisibilities {}", fieldCollection, entry.getValue().getColumnVisibility(), fieldVisibilities);
- } else {
- log.trace("fieldList.size() != this.expandedGroupFieldsList.size()");
- log.trace("fieldList: {}", fieldCollection);
- log.trace("expandedGroupFieldsList: {}", expandedGroupFieldsList);
- }
- }
-
- log.trace("countingMap: {}", countingMap);
- }
-
- return new GroupingInfo(countingMap, fieldVisibilities);
- }
-
- private Multimap getFieldToFieldWithGroupingContextMap(Document d, Set expandedGroupFieldsList,
- Map> fieldMap, Set groupFieldsSet, Map reverseModelMapping) {
-
- Multimap fieldToFieldWithContextMap = TreeMultimap.create();
- for (Map.Entry>> entry : d.entrySet()) {
- Attribute> field = entry.getValue();
- log.trace("field is {}", field);
- String fieldName = entry.getKey();
- String shortName = fieldName;
- String shorterName = shortName;
- if (shortName.indexOf('.') != -1)
- shortName = shortName.substring(0, shortName.lastIndexOf('.'));
- if (shorterName.indexOf('.') != -1)
- shorterName = shorterName.substring(0, shorterName.indexOf('.'));
- log.trace("fieldName: {}, shortName: {}", fieldName, shortName);
- if (reverseModelMapping != null) {
- String finalName = reverseModelMapping.get(shorterName);
- if (finalName != null) {
- shortName = finalName + shortName.substring(shorterName.length());
- fieldName = finalName + fieldName.substring(shorterName.length());
- shorterName = finalName;
- }
- }
- if (groupFieldsSet.contains(shorterName)) {
- expandedGroupFieldsList.add(shortName);
- log.trace("{} contains {}", groupFieldsSet, shorterName);
-
- if (field.getData() instanceof Collection>) {
- // This handles multivalued entries that do not have grouping context
- // Create GroupingTypeAttribute and put in ordered map ordered on the attribute type
- SortedSetMultimap,GroupingTypeAttribute>> attrSortedMap = TreeMultimap.create();
- for (Object typeAttribute : ((Collection>) field.getData())) {
- Type> type = ((TypeAttribute>) typeAttribute).getType();
- GroupingTypeAttribute> created = new GroupingTypeAttribute<>(type, new Key(shortName), true);
- created.setColumnVisibility(field.getColumnVisibility());
- attrSortedMap.put(type, created);
- }
-
- // Add GroupingTypeAttribute to fieldMap with a grouping context that is based on ordered attribute type
- int i = 0;
- for (Map.Entry,GroupingTypeAttribute>> sortedEntry : attrSortedMap.entries()) {
- String fieldNameWithContext = fieldName + "." + i++;
- fieldMap.put(fieldNameWithContext, sortedEntry.getValue());
- fieldToFieldWithContextMap.put(shortName, fieldNameWithContext);
- }
- } else {
- GroupingTypeAttribute> created = new GroupingTypeAttribute<>((Type) field.getData(), new Key(shortName), true);
- created.setColumnVisibility(field.getColumnVisibility());
- fieldMap.put(fieldName, created);
- fieldToFieldWithContextMap.put(shortName, fieldName);
- }
- } else {
- log.trace("{} does not contain {}", groupFieldsSet, shorterName);
- }
- }
- log.trace("fieldMap: {}", fieldMap);
- log.trace("fields: {}", d.entrySet());
- log.trace("fieldToFieldWithGroupingContextMap: {}", fieldToFieldWithContextMap);
- log.trace("expandedGroupFieldsList: {}", expandedGroupFieldsList);
- return fieldToFieldWithContextMap;
- }
-
- private static int longestValueList(Multimap in) {
- int max = 0;
- for (Collection valueCollection : in.asMap().values()) {
- max = Math.max(max, valueCollection.size());
- }
- return max;
- }
-
- /**
- * Provides a clear way to return multiple things related to grouping that are generated from one method.
- */
- public static class GroupingInfo {
-
- private final GroupCountingHashMap countingMap;
-
- private final Multimap>,ColumnVisibility> fieldVisibilities;
-
- GroupingInfo(GroupCountingHashMap countingMap, Multimap>,ColumnVisibility> fieldVisibilities) {
- this.countingMap = countingMap;
- this.fieldVisibilities = fieldVisibilities;
- }
-
- public GroupCountingHashMap getCountsMap() {
- return countingMap;
- }
-
- public Multimap>,ColumnVisibility> getFieldVisibilities() {
- return fieldVisibilities;
- }
- }
-
- public static class GroupCountingHashMap extends HashMap>,Integer> {
-
- private static final Logger log = getLogger(GroupCountingHashMap.class);
-
- private MarkingFunctions markingFunctions;
-
- public GroupCountingHashMap(MarkingFunctions markingFunctions) {
- this.markingFunctions = markingFunctions;
- }
-
- public int add(Collection> in) {
- int count = 0;
- if (super.containsKey(in)) {
- count = super.get(in);
- // aggregate the visibilities
- combine(this.keySet(), in);
- }
- count++;
- super.put(in, count);
- return count;
- }
-
- private void combine(Set>> existingMapKeys, Collection extends Attribute>> incomingAttributes) {
-
- // for each Attribute in the incomingAttributes, find the existing map key attribute that matches its data.
- // combine the column visibilities of the incoming attribute and the existing one, and set
- // the column visibility of the EXISTING map key to the new value.
- // Note that the hashCode and equals methods for the GroupingTypeAttribute will ignore the metadata (which contains the column visibility)
- incomingAttributes.forEach(incomingAttribute -> {
- existingMapKeys.stream().flatMap(Collection::stream)
- // if the existing and incoming attributes are equal (other than the metadata), the incoming attribute's visibility will be
- // considered for merging into the existing attribute unless the column visibilities are already equal
- .filter(existingAttribute -> existingAttribute.getData().equals(incomingAttribute.getData())
- && !existingAttribute.getColumnVisibility().equals(incomingAttribute.getColumnVisibility()))
- .forEach(existingAttribute -> existingAttribute.setColumnVisibility(
- combine(Arrays.asList(existingAttribute.getColumnVisibility(), incomingAttribute.getColumnVisibility()))));
- });
- }
-
- private ColumnVisibility combine(Collection in) {
- try {
- ColumnVisibility columnVisibility = markingFunctions.combine(in);
- log.trace("combined {} into {}", in, columnVisibility);
- return columnVisibility;
- } catch (MarkingFunctions.Exception e) {
- log.warn("was unable to combine visibilities from {}", in);
- }
- return new ColumnVisibility();
- }
-
- }
-
- public static class GroupingTypeAttribute> extends TypeAttribute {
-
- public GroupingTypeAttribute(Type type, Key key, boolean toKeep) {
- super(type, key, toKeep);
- }
-
- @Override
- public boolean equals(Object o) {
- if (null == o) {
- return false;
- }
-
- if (o instanceof TypeAttribute) {
- TypeAttribute other = (TypeAttribute) o;
- return this.getType().equals(other.getType()) && (0 == this.compareMetadataRow(other));
- }
- return false;
- }
-
- private int compareMetadataRow(Attribute other) {
- if (this.isMetadataSet() != other.isMetadataSet()) {
- if (this.isMetadataSet()) {
- return 1;
- } else {
- return -1;
- }
- } else if (this.isMetadataSet()) {
- return this.metadata.compareRow(other.getMetadata().getRow());
- } else {
- return 0;
- }
- }
-
- @Override
- public int hashCode() {
- HashCodeBuilder hcb = new HashCodeBuilder(2099, 2129);
- hcb.append(getType().getDelegateAsString());
- return hcb.toHashCode();
- }
- }
-
-}
diff --git a/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupingUtils.java b/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupingUtils.java
new file mode 100644
index 0000000000..02e7a1b846
--- /dev/null
+++ b/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupingUtils.java
@@ -0,0 +1,267 @@
+package datawave.query.common.grouping;
+
+import static org.slf4j.LoggerFactory.getLogger;
+
+import java.math.BigDecimal;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.security.ColumnVisibility;
+import org.slf4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+import datawave.data.type.NumberType;
+import datawave.marking.MarkingFunctions;
+import datawave.query.attributes.Document;
+import datawave.query.attributes.TypeAttribute;
+
+/**
+ * This class contains utility functions used by multiple classes for grouping operations.
+ */
+public class GroupingUtils {
+
+ public enum AverageAggregatorWriteFormat {
+ AVERAGE, NUMERATOR_AND_DIVISOR
+ }
+
+ private static final Logger log = getLogger(GroupingUtils.class);
+
+ /**
+ * Returns a column visibility that results from the combination of all given visibilities using the given {@link MarkingFunctions}.
+ *
+ * @param visibilities
+ * the visibilities to combine
+ * @param markingFunctions
+ * the marking functions to combine the visibilities with
+ * @param failOnError
+ * if true and the visibilities cannot be combined, an {@link IllegalArgumentException} will be thrown. If false and the visibilities cannot be
+ * combined, it will be logged and a new, blank {@link ColumnVisibility} will be returned.
+ * @return the combined column visibility
+ */
+ public static ColumnVisibility combineVisibilities(Collection visibilities, MarkingFunctions markingFunctions, boolean failOnError) {
+ try {
+ return markingFunctions.combine(visibilities);
+ } catch (MarkingFunctions.Exception e) {
+ if (failOnError) {
+ throw new IllegalArgumentException("Unable to combine visibilities: " + visibilities, e);
+ } else {
+ log.warn("Unable to combine visibilities from {}", visibilities);
+ }
+ }
+ return new ColumnVisibility();
+ }
+
+ /**
+ * Create and return a new {@link Document} with the given group information embedded into it.
+ *
+ * @param group
+ * the group
+ * @param keys
+ * the list of iterator keys that have been read
+ * @param markingFunctions
+ * the marking functions to use when combining column visibilities
+ * @param averageWriteFormat
+ * the format to use when writing aggregated averages to the document
+ * @return the new document
+ */
+ public static Document createDocument(Group group, List keys, MarkingFunctions markingFunctions, AverageAggregatorWriteFormat averageWriteFormat) {
+ Preconditions.checkState(!keys.isEmpty(), "No available keys for grouping results");
+
+ // Use the last (most recent) key so a new iterator will know where to start.
+ Key key = keys.get(keys.size() - 1);
+ Document document = new Document(key, true);
+
+ // Set the visibility for the document to the combined visibility of each previous document in which this grouping was seen in.
+ document.setColumnVisibility(combineVisibilities(group.getDocumentVisibilities(), markingFunctions, true));
+
+ // Add each of the grouping attributes to the document.
+ for (GroupingAttribute> attribute : group.getGrouping()) {
+ // Update the visibility to the combined visibilities of each visibility seen for this attribute in a grouping.
+ attribute.setColumnVisibility(combineVisibilities(group.getVisibilitiesForAttribute(attribute), markingFunctions, false));
+ document.put(attribute.getMetadata().getRow().toString(), attribute);
+ }
+
+ // Add an attribute for the count.
+ NumberType type = new NumberType();
+ type.setDelegate(new BigDecimal(group.getCount()));
+ TypeAttribute attr = new TypeAttribute<>(type, new Key("count"), true);
+ document.put("COUNT", attr);
+
+ // Add each aggregated field.
+ FieldAggregator fieldAggregator = group.getFieldAggregator();
+ if (fieldAggregator != null) {
+ Map