Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport 2.x] Neural sparse query two-phase search processor's bwc test #831

Merged
merged 1 commit into from
Jul 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Fix for missing HybridQuery results when concurrent segment search is enabled ([#800](https://github.com/opensearch-project/neural-search/pull/800))
### Infrastructure
- Add BWC for batch ingestion ([#769](https://github.com/opensearch-project/neural-search/pull/769))
- Add backward test cases for neural sparse two phase processor ([#777](https://github.com/opensearch-project/neural-search/pull/777))
### Documentation
### Maintenance
### Refactoring
14 changes: 14 additions & 0 deletions qa/restart-upgrade/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,13 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) {
}
}

// Excluding the NeuralSparseQuery two-phase search pipeline tests because we introduce this feature in 2.15
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*"
}
}

nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")
systemProperty 'tests.security.manager', 'false'
Expand Down Expand Up @@ -155,6 +162,13 @@ task testAgainstNewCluster(type: StandaloneRestIntegTestTask) {
}
}

// Excluding the NeuralSparseQuery two-phase search pipeline tests because we introduce this feature in 2.15
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*"
}
}

nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")
systemProperty 'tests.security.manager', 'false'
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.bwc;

import org.opensearch.common.settings.Settings;
import org.opensearch.neuralsearch.query.NeuralSparseQueryBuilder;
import org.opensearch.neuralsearch.util.TestUtils;

import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;

import static org.opensearch.neuralsearch.util.TestUtils.NODES_BWC_CLUSTER;
import static org.opensearch.neuralsearch.util.TestUtils.SPARSE_ENCODING_PROCESSOR;

public class NeuralSparseTwoPhaseProcessorIT extends AbstractRestartUpgradeRestTestCase {

private static final String NEURAL_SPARSE_INGEST_PIPELINE_NAME = "nstp-nlp-ingest-pipeline-dense";
private static final String NEURAL_SPARSE_TWO_PHASE_SEARCH_PIPELINE_NAME = "nstp-nlp-two-phase-search-pipeline-sparse";
private static final String TEST_ENCODING_FIELD = "passage_embedding";
private static final String TEST_TEXT_FIELD = "passage_text";
private static final String TEXT_1 = "Hello world a b";

public void testNeuralSparseQueryTwoPhaseProcessor_NeuralSearch_E2EFlow() throws Exception {
waitForClusterHealthGreen(NODES_BWC_CLUSTER);
NeuralSparseQueryBuilder neuralSparseQueryBuilder = new NeuralSparseQueryBuilder().fieldName(TEST_ENCODING_FIELD).queryText(TEXT_1);
if (isRunningAgainstOldCluster()) {
String modelId = uploadSparseEncodingModel();
loadModel(modelId);
neuralSparseQueryBuilder.modelId(modelId);
createPipelineForSparseEncodingProcessor(modelId, NEURAL_SPARSE_INGEST_PIPELINE_NAME);
createIndexWithConfiguration(
getIndexNameForTest(),
Files.readString(Path.of(classLoader.getResource("processor/SparseIndexMappings.json").toURI())),
NEURAL_SPARSE_INGEST_PIPELINE_NAME
);
addSparseEncodingDoc(getIndexNameForTest(), "0", List.of(), List.of(), List.of(TEST_TEXT_FIELD), List.of(TEXT_1));
createNeuralSparseTwoPhaseSearchProcessor(NEURAL_SPARSE_TWO_PHASE_SEARCH_PIPELINE_NAME);
updateIndexSettings(
getIndexNameForTest(),
Settings.builder().put("index.search.default_pipeline", NEURAL_SPARSE_TWO_PHASE_SEARCH_PIPELINE_NAME)
);
Object resultWith2PhasePipeline = search(getIndexNameForTest(), neuralSparseQueryBuilder, 1).get("hits");
assertNotNull(resultWith2PhasePipeline);
} else {
String modelId = null;
try {
modelId = TestUtils.getModelId(getIngestionPipeline(NEURAL_SPARSE_INGEST_PIPELINE_NAME), SPARSE_ENCODING_PROCESSOR);
loadModel(modelId);
neuralSparseQueryBuilder.modelId(modelId);
Object resultWith2PhasePipeline = search(getIndexNameForTest(), neuralSparseQueryBuilder, 1).get("hits");
assertNotNull(resultWith2PhasePipeline);
} finally {
wipeOfTestResources(
getIndexNameForTest(),
NEURAL_SPARSE_INGEST_PIPELINE_NAME,
modelId,
NEURAL_SPARSE_TWO_PHASE_SEARCH_PIPELINE_NAME
);
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"request_processors": [
{
"neural_sparse_two_phase_processor": {
"tag": "neural-sparse",
"description": "This processor is making two-phase rescorer.",
"enabled": true,
"two_phase_parameter": {
"prune_ratio": %f,
"expansion_rate": %f,
"max_window_size": %d
}
}
}
]
}
39 changes: 39 additions & 0 deletions qa/rolling-upgrade/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,16 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) {
}
}

// Excluding the neural sparse two phase processor test because we introduce this feature in 2.15
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10")
|| ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")
|| ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*"
}
}


nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")
systemProperty 'tests.security.manager', 'false'
Expand Down Expand Up @@ -156,6 +166,16 @@ task testAgainstOneThirdUpgradedCluster(type: StandaloneRestIntegTestTask) {
}
}

// Excluding the neural sparse two phase processor test because we introduce this feature in 2.15
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10")
|| ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")
|| ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*"
}
}


nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")
systemProperty 'tests.security.manager', 'false'
Expand Down Expand Up @@ -213,6 +233,16 @@ task testAgainstTwoThirdsUpgradedCluster(type: StandaloneRestIntegTestTask) {
}
}

// Excluding the neural sparse two phase processor test because we introduce this feature in 2.15
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10")
|| ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")
|| ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*"
}
}


nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")
systemProperty 'tests.security.manager', 'false'
Expand Down Expand Up @@ -270,6 +300,15 @@ task testRollingUpgrade(type: StandaloneRestIntegTestTask) {
}
}

// Excluding the neural sparse two phase processor test because we introduce this feature in 2.15
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10")
|| ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")
|| ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*"
}
}

nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")
systemProperty 'tests.security.manager', 'false'
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.bwc;

import org.opensearch.common.settings.Settings;
import org.opensearch.neuralsearch.query.NeuralSparseQueryBuilder;
import org.opensearch.neuralsearch.util.TestUtils;

import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;

import static org.opensearch.neuralsearch.util.TestUtils.NODES_BWC_CLUSTER;
import static org.opensearch.neuralsearch.util.TestUtils.SPARSE_ENCODING_PROCESSOR;

public class NeuralSparseTwoPhaseProcessorIT extends AbstractRollingUpgradeTestCase {
// add prefix to avoid conflicts with other IT class, since don't wipe resources after first round
private static final String SPARSE_INGEST_PIPELINE_NAME = "nstp-nlp-ingest-pipeline-sparse";
private static final String SPARSE_SEARCH_TWO_PHASE_PIPELINE_NAME = "nstp-nlp-two-phase-search-pipeline-sparse";
private static final String TEST_ENCODING_FIELD = "passage_embedding";
private static final String TEST_TEXT_FIELD = "passage_text";
private static final String TEXT_1 = "Hello world a b";
private String sparseModelId = "";

// test of NeuralSparseTwoPhaseProcessor supports neural_sparse query's two phase speed up
// the feature is introduced from 2.15
public void testNeuralSparseTwoPhaseProcessorIT_NeuralSparseSearch_E2EFlow() throws Exception {
waitForClusterHealthGreen(NODES_BWC_CLUSTER);
// will set the model_id after we obtain the id
NeuralSparseQueryBuilder neuralSparseQueryBuilder = new NeuralSparseQueryBuilder().fieldName(TEST_ENCODING_FIELD).queryText(TEXT_1);

switch (getClusterType()) {
case OLD:
sparseModelId = uploadSparseEncodingModel();
loadModel(sparseModelId);
neuralSparseQueryBuilder.modelId(sparseModelId);
createPipelineForSparseEncodingProcessor(sparseModelId, SPARSE_INGEST_PIPELINE_NAME);
createIndexWithConfiguration(
getIndexNameForTest(),
Files.readString(Path.of(classLoader.getResource("processor/SparseIndexMappings.json").toURI())),
SPARSE_INGEST_PIPELINE_NAME
);
addSparseEncodingDoc(getIndexNameForTest(), "0", List.of(), List.of(), List.of(TEST_TEXT_FIELD), List.of(TEXT_1));
createNeuralSparseTwoPhaseSearchProcessor(SPARSE_SEARCH_TWO_PHASE_PIPELINE_NAME);
updateIndexSettings(
getIndexNameForTest(),
Settings.builder().put("index.search.default_pipeline", SPARSE_SEARCH_TWO_PHASE_PIPELINE_NAME)
);
assertNotNull(search(getIndexNameForTest(), neuralSparseQueryBuilder, 1).get("hits"));
break;
case MIXED:
sparseModelId = TestUtils.getModelId(getIngestionPipeline(SPARSE_INGEST_PIPELINE_NAME), SPARSE_ENCODING_PROCESSOR);
loadModel(sparseModelId);
neuralSparseQueryBuilder.modelId(sparseModelId);
assertNotNull(search(getIndexNameForTest(), neuralSparseQueryBuilder, 1).get("hits"));
break;
case UPGRADED:
try {
sparseModelId = TestUtils.getModelId(getIngestionPipeline(SPARSE_INGEST_PIPELINE_NAME), SPARSE_ENCODING_PROCESSOR);
loadModel(sparseModelId);
neuralSparseQueryBuilder.modelId(sparseModelId);
assertNotNull(search(getIndexNameForTest(), neuralSparseQueryBuilder, 1).get("hits"));
} finally {
wipeOfTestResources(
getIndexNameForTest(),
SPARSE_INGEST_PIPELINE_NAME,
sparseModelId,
SPARSE_SEARCH_TWO_PHASE_PIPELINE_NAME
);
}
break;
default:
throw new IllegalStateException("Unexpected value: " + getClusterType());
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"request_processors": [
{
"neural_sparse_two_phase_processor": {
"tag": "neural-sparse",
"description": "This processor is making two-phase rescorer.",
"enabled": true,
"two_phase_parameter": {
"prune_ratio": %f,
"expansion_rate": %f,
"max_window_size": %d
}
}
}
]
}
Loading