Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Star tree] Support for keyword changes as part of star tree mapper #16103

Draft
wants to merge 13 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public class StarTreeMapperIT extends OpenSearchIntegTestCase {
.put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), new ByteSizeValue(512, ByteSizeUnit.MB))
.build();

private static XContentBuilder createMinimalTestMapping(boolean invalidDim, boolean invalidMetric, boolean keywordDim) {
private static XContentBuilder createMinimalTestMapping(boolean invalidDim, boolean invalidMetric, boolean ipdim) {
try {
return jsonBuilder().startObject()
.startObject("composite")
Expand All @@ -68,12 +68,15 @@ private static XContentBuilder createMinimalTestMapping(boolean invalidDim, bool
.endObject()
.startArray("ordered_dimensions")
.startObject()
.field("name", getDim(invalidDim, keywordDim))
.field("name", getDim(invalidDim, ipdim))
.endObject()
.startObject()
.field("name", "keyword_dv")
.endObject()
.endArray()
.startArray("metrics")
.startObject()
.field("name", getDim(invalidMetric, false))
.field("name", getMetric(invalidMetric, false))
.endObject()
.endArray()
.endObject()
Expand All @@ -99,6 +102,10 @@ private static XContentBuilder createMinimalTestMapping(boolean invalidDim, bool
.field("type", "keyword")
.field("doc_values", false)
.endObject()
.startObject("ip")
.field("type", "ip")
.field("doc_values", false)
.endObject()
.endObject()
.endObject();
} catch (IOException e) {
Expand Down Expand Up @@ -356,10 +363,19 @@ private XContentBuilder getMappingWithDuplicateFields(boolean isDuplicateDim, bo
}

private static String getDim(boolean hasDocValues, boolean isKeyword) {
if (hasDocValues) {
return random().nextBoolean() ? "numeric" : "keyword";
} else if (isKeyword) {
return "ip";
}
return "numeric_dv";
}

private static String getMetric(boolean hasDocValues, boolean isKeyword) {
if (hasDocValues) {
return "numeric";
} else if (isKeyword) {
return "keyword";
return "ip";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why are we returning "ip" in the above two ifs?

}
return "numeric_dv";
}
Expand Down Expand Up @@ -398,6 +414,7 @@ public void testValidCompositeIndex() {
assertEquals(expectedTimeUnits.get(i).shortName(), dateDim.getSortedCalendarIntervals().get(i).shortName());
}
assertEquals("numeric_dv", starTreeFieldType.getDimensions().get(1).getField());
assertEquals("keyword_dv", starTreeFieldType.getDimensions().get(2).getField());
assertEquals("numeric_dv", starTreeFieldType.getMetrics().get(0).getField());
List<MetricStat> expectedMetrics = Arrays.asList(MetricStat.VALUE_COUNT, MetricStat.SUM, MetricStat.AVG);
assertEquals(expectedMetrics, starTreeFieldType.getMetrics().get(0).getMetrics());
Expand Down Expand Up @@ -665,10 +682,7 @@ public void testInvalidDimCompositeIndex() {
IllegalArgumentException.class,
() -> prepareCreate(TEST_INDEX).setSettings(settings).setMapping(createMinimalTestMapping(true, false, false)).get()
);
assertEquals(
"Aggregations not supported for the dimension field [numeric] with field type [integer] as part of star tree field",
ex.getMessage()
);
assertTrue(ex.getMessage().startsWith("Aggregations not supported for the dimension field "));
}

public void testMaxDimsCompositeIndex() {
Expand Down Expand Up @@ -734,7 +748,7 @@ public void testUnsupportedDim() {
() -> prepareCreate(TEST_INDEX).setSettings(settings).setMapping(createMinimalTestMapping(false, false, true)).get()
);
assertEquals(
"Failed to parse mapping [_doc]: unsupported field type associated with dimension [keyword] as part of star tree field [startree-1]",
"Failed to parse mapping [_doc]: unsupported field type associated with dimension [ip] as part of star tree field [startree-1]",
ex.getMessage()
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.IndexOutput;
import org.opensearch.common.annotation.ExperimentalApi;
Expand All @@ -34,6 +35,7 @@
import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues;
import org.opensearch.index.mapper.CompositeMappedFieldType;
import org.opensearch.index.mapper.DocCountFieldMapper;
import org.opensearch.index.mapper.KeywordFieldMapper;
import org.opensearch.index.mapper.MapperService;
import org.opensearch.index.mapper.StarTreeMapper;

Expand Down Expand Up @@ -82,14 +84,7 @@ public Composite99DocValuesWriter(DocValuesConsumer delegate, SegmentWriteState
this.compositeMappedFieldTypes = mapperService.getCompositeFieldTypes();
compositeFieldSet = new HashSet<>();
segmentFieldSet = new HashSet<>();
// TODO : add integ test for this
for (FieldInfo fi : this.state.fieldInfos) {
if (DocValuesType.SORTED_NUMERIC.equals(fi.getDocValuesType())) {
segmentFieldSet.add(fi.name);
} else if (fi.name.equals(DocCountFieldMapper.NAME)) {
segmentFieldSet.add(fi.name);
}
}
addStarTreeSupportedFieldsFromSegment();
for (CompositeMappedFieldType type : compositeMappedFieldTypes) {
compositeFieldSet.addAll(type.fields());
}
Expand Down Expand Up @@ -148,6 +143,19 @@ public Composite99DocValuesWriter(DocValuesConsumer delegate, SegmentWriteState
segmentHasCompositeFields = Collections.disjoint(segmentFieldSet, compositeFieldSet) == false;
}

private void addStarTreeSupportedFieldsFromSegment() {
// TODO : add integ test for this
for (FieldInfo fi : this.state.fieldInfos) {
if (DocValuesType.SORTED_NUMERIC.equals(fi.getDocValuesType())) {
segmentFieldSet.add(fi.name);
} else if (DocValuesType.SORTED_SET.equals(fi.getDocValuesType())) {
segmentFieldSet.add(fi.name);
} else if (fi.name.equals(DocCountFieldMapper.NAME)) {
segmentFieldSet.add(fi.name);
}
}
}

@Override
public void addNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
delegate.addNumericField(field, valuesProducer);
Expand Down Expand Up @@ -179,6 +187,10 @@ public void addSortedNumericField(FieldInfo field, DocValuesProducer valuesProdu
@Override
public void addSortedSetField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
delegate.addSortedSetField(field, valuesProducer);
// Perform this only during flush flow
if (mergeState.get() == null && segmentHasCompositeFields) {
createCompositeIndicesIfPossible(valuesProducer, field);
}
}

@Override
Expand Down Expand Up @@ -235,6 +247,7 @@ private void createCompositeIndicesIfPossible(DocValuesProducer valuesProducer,
* Add empty doc values for fields not present in segment
*/
private void addDocValuesForEmptyField(String compositeField) {
// special case for doc count
if (compositeField.equals(DocCountFieldMapper.NAME)) {
fieldProducerMap.put(compositeField, new EmptyDocValuesProducer() {
@Override
Expand All @@ -243,16 +256,29 @@ public NumericDocValues getNumeric(FieldInfo field) {
}
});
} else {
fieldProducerMap.put(compositeField, new EmptyDocValuesProducer() {
@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) {
return DocValues.emptySortedNumeric();
}
});
if(isSortedSetField(compositeField)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we have a switch? for when we support more field types.

fieldProducerMap.put(compositeField, new EmptyDocValuesProducer() {
@Override
public SortedSetDocValues getSortedSet(FieldInfo field) {
return DocValues.emptySortedSet();
}
});
} else {
fieldProducerMap.put(compositeField, new EmptyDocValuesProducer() {
@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) {
return DocValues.emptySortedNumeric();
}
});
}
}
compositeFieldSet.remove(compositeField);
}

private boolean isSortedSetField(String field) {
return mapperService.fieldType(field) instanceof KeywordFieldMapper.KeywordFieldType;
}

@Override
public void merge(MergeState mergeState) throws IOException {
this.mergeState.compareAndSet(null, mergeState);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.util.stream.Collectors;

import static org.opensearch.index.compositeindex.datacube.DateDimension.CALENDAR_INTERVALS;
import static org.opensearch.index.compositeindex.datacube.KeywordDimension.KEYWORD;

/**
* Dimension factory class mainly used to parse and create dimension from the mappings
Expand All @@ -43,6 +44,8 @@ public static Dimension parseAndCreateDimension(
return parseAndCreateDateDimension(name, dimensionMap, c);
case NumericDimension.NUMERIC:
return new NumericDimension(name);
case KEYWORD:
return new KeywordDimension(name);
default:
throw new IllegalArgumentException(
String.format(Locale.ROOT, "unsupported field type associated with dimension [%s] as part of star tree field", name)
Expand All @@ -56,16 +59,23 @@ public static Dimension parseAndCreateDimension(
Map<String, Object> dimensionMap,
Mapper.TypeParser.ParserContext c
) {
if (builder.getSupportedDataCubeDimensionType().isPresent()
&& builder.getSupportedDataCubeDimensionType().get().equals(DimensionType.DATE)) {
return parseAndCreateDateDimension(name, dimensionMap, c);
} else if (builder.getSupportedDataCubeDimensionType().isPresent()
&& builder.getSupportedDataCubeDimensionType().get().equals(DimensionType.NUMERIC)) {
if (builder.getSupportedDataCubeDimensionType().isEmpty()) {
throw new IllegalArgumentException(
String.format(Locale.ROOT, "unsupported field type associated with star tree dimension [%s]", name)
);
}
switch (builder.getSupportedDataCubeDimensionType().get()) {
case DATE:
return parseAndCreateDateDimension(name, dimensionMap, c);
case NUMERIC:
return new NumericDimension(name);
}
throw new IllegalArgumentException(
String.format(Locale.ROOT, "unsupported field type associated with star tree dimension [%s]", name)
);
case KEYWORD:
return new KeywordDimension(name);
default:
throw new IllegalArgumentException(
String.format(Locale.ROOT, "unsupported field type associated with star tree dimension [%s]", name)
);
}
}

private static DateDimension parseAndCreateDateDimension(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,11 @@ public enum DimensionType {
* Represents a date dimension type.
* This is used for dimensions that contain date or timestamp values.
*/
DATE
DATE,

/**
* Represents a keyword dimension type.
* This is used for dimensions that contain keyword ordinals.
*/
KEYWORD
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.compositeindex.datacube;

import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.index.mapper.CompositeDataCubeFieldType;

import java.io.IOException;
import java.util.List;
import java.util.Objects;

/**
* Composite index keyword dimension class
*
* @opensearch.experimental
*/
@ExperimentalApi
public class KeywordDimension implements Dimension {
public static final String KEYWORD = "keyword";
private final String field;

public KeywordDimension(String field) {
this.field = field;
}

@Override
public String getField() {
return field;
}

@Override
public int getNumSubDimensions() {
return 1;
}

@Override
public int setDimensionValues(Long value, Long[] dims, int index) {
dims[index++] = value;
return index;
}

@Override
public List<String> getDimensionFieldsNames() {
return List.of(field);
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.field(CompositeDataCubeFieldType.NAME, field);
builder.field(CompositeDataCubeFieldType.TYPE, KEYWORD);
builder.endObject();
return builder;
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
KeywordDimension dimension = (KeywordDimension) o;
return Objects.equals(field, dimension.getField());
}

@Override
public int hashCode() {
return Objects.hash(field);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RandomAccessInput;
import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.common.util.io.IOUtils;
import org.opensearch.index.compositeindex.datacube.startree.StarTreeDocument;
import org.opensearch.index.compositeindex.datacube.startree.StarTreeField;
Expand Down Expand Up @@ -45,7 +46,9 @@
* <p>The set of 'star-tree.documents' files is maintained, and a tracker array is used to keep track of the start document ID for each file.
* Once the number of files reaches a set threshold, the files are merged.
*
* @opensearch.experimental
*/
@ExperimentalApi
public class StarTreeDocsFileManager extends AbstractDocumentsFileManager implements Closeable {
private static final Logger logger = LogManager.getLogger(StarTreeDocsFileManager.class);
private static final String STAR_TREE_DOC_FILE_NAME = "star-tree.documents";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
import org.opensearch.core.xcontent.XContentParser;
import org.opensearch.index.analysis.IndexAnalyzers;
import org.opensearch.index.analysis.NamedAnalyzer;
import org.opensearch.index.compositeindex.datacube.DimensionType;
import org.opensearch.index.fielddata.IndexFieldData;
import org.opensearch.index.fielddata.plain.SortedSetOrdinalsIndexFieldData;
import org.opensearch.index.query.QueryShardContext;
Expand All @@ -73,6 +74,7 @@
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.function.Supplier;

import static org.opensearch.search.SearchService.ALLOW_EXPENSIVE_QUERIES;
Expand Down Expand Up @@ -254,6 +256,11 @@ public KeywordFieldMapper build(BuilderContext context) {
this
);
}

@Override
public Optional<DimensionType> getSupportedDataCubeDimensionType() {
return Optional.of(DimensionType.KEYWORD);
}
}

public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.getIndexAnalyzers()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4264,9 +4264,15 @@ public void testFlushFlowWithTimestamps() throws IOException {

compositeField = getStarTreeFieldWithDateDimension();
SortedNumericStarTreeValuesIterator d1sndv = new SortedNumericStarTreeValuesIterator(getSortedNumericMock(dimList, docsWithField));
SortedNumericStarTreeValuesIterator d2sndv = new SortedNumericStarTreeValuesIterator(getSortedNumericMock(dimList2, docsWithField2));
SortedNumericStarTreeValuesIterator m1sndv = new SortedNumericStarTreeValuesIterator(getSortedNumericMock(metricsList, metricsWithField));
SortedNumericStarTreeValuesIterator m2sndv = new SortedNumericStarTreeValuesIterator(getSortedNumericMock(metricsList, metricsWithField));
SortedNumericStarTreeValuesIterator d2sndv = new SortedNumericStarTreeValuesIterator(
getSortedNumericMock(dimList2, docsWithField2)
);
SortedNumericStarTreeValuesIterator m1sndv = new SortedNumericStarTreeValuesIterator(
getSortedNumericMock(metricsList, metricsWithField)
);
SortedNumericStarTreeValuesIterator m2sndv = new SortedNumericStarTreeValuesIterator(
getSortedNumericMock(metricsList, metricsWithField)
);
this.docValuesConsumer = LuceneDocValuesConsumerFactory.getDocValuesConsumerForCompositeCodec(
writeState,
Composite99DocValuesFormat.DATA_DOC_VALUES_CODEC,
Expand Down
Loading
Loading