diff --git a/libs/core/src/main/java/org/opensearch/core/common/transport/TransportAddress.java b/libs/core/src/main/java/org/opensearch/core/common/transport/TransportAddress.java index 3b5fbb7d76307..b7ab988ce0404 100644 --- a/libs/core/src/main/java/org/opensearch/core/common/transport/TransportAddress.java +++ b/libs/core/src/main/java/org/opensearch/core/common/transport/TransportAddress.java @@ -162,4 +162,14 @@ public String toString() { public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { return builder.value(toString()); } + + public static TransportAddress fromString(String address) throws UnknownHostException { + String[] addressSplit = address.split(":"); + if (addressSplit.length != 2) { + throw new IllegalArgumentException("address must be of the form [hostname/ip]:[port]"); + } + String hostname = addressSplit[0]; + int port = Integer.parseInt(addressSplit[1]); + return new TransportAddress(InetAddress.getByName(hostname), port); + } } diff --git a/libs/core/src/main/java/org/opensearch/core/xcontent/XContentParserUtils.java b/libs/core/src/main/java/org/opensearch/core/xcontent/XContentParserUtils.java index b10be393f9adb..e0128a036148e 100644 --- a/libs/core/src/main/java/org/opensearch/core/xcontent/XContentParserUtils.java +++ b/libs/core/src/main/java/org/opensearch/core/xcontent/XContentParserUtils.java @@ -38,6 +38,8 @@ import org.opensearch.core.xcontent.XContentParser.Token; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; import java.util.Locale; import java.util.function.Consumer; @@ -178,4 +180,14 @@ public static void parseTypedKeysObject(XContentParser parser, String delimi throw new ParsingException(parser.getTokenLocation(), "Failed to parse object: empty key"); } } + + public static List parseStringList(XContentParser parser) throws IOException { + List valueList = new ArrayList<>(); + ensureExpectedToken(Token.START_ARRAY, parser.currentToken(), parser); + while (parser.nextToken() != Token.END_ARRAY) { + ensureExpectedToken(Token.VALUE_STRING, parser.currentToken(), parser); + valueList.add(parser.text()); + } + return valueList; + } } diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManagerIT.java b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManagerIT.java new file mode 100644 index 0000000000000..b9dac874ed0c0 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManagerIT.java @@ -0,0 +1,139 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + +import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsResponse; +import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.common.settings.Settings; +import org.opensearch.remotestore.RemoteStoreBaseIntegTestCase; +import org.opensearch.repositories.RepositoriesService; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.test.OpenSearchIntegTestCase; + +import java.nio.charset.StandardCharsets; +import java.util.Base64; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.CLUSTER_STATE_CLEANUP_INTERVAL_DEFAULT; +import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING; +import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.RETAINED_MANIFESTS; +import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.SKIP_CLEANUP_STATE_CHANGES; +import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING; +import static org.opensearch.indices.IndicesService.CLUSTER_DEFAULT_INDEX_REFRESH_INTERVAL_SETTING; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +public class RemoteClusterStateCleanupManagerIT extends RemoteStoreBaseIntegTestCase { + + private static final String INDEX_NAME = "test-index"; + + @Override + protected Settings nodeSettings(int nodeOrdinal) { + return Settings.builder().put(super.nodeSettings(nodeOrdinal)).put(REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), true).build(); + } + + private Map initialTestSetup(int shardCount, int replicaCount, int dataNodeCount, int clusterManagerNodeCount) { + prepareCluster(clusterManagerNodeCount, dataNodeCount, INDEX_NAME, replicaCount, shardCount); + Map indexStats = indexData(1, false, INDEX_NAME); + assertEquals(shardCount * (replicaCount + 1), getNumShards(INDEX_NAME).totalNumShards); + ensureGreen(INDEX_NAME); + return indexStats; + } + + public void testRemoteCleanupTaskUpdated() { + int shardCount = randomIntBetween(1, 2); + int replicaCount = 1; + int dataNodeCount = shardCount * (replicaCount + 1); + int clusterManagerNodeCount = 1; + + initialTestSetup(shardCount, replicaCount, dataNodeCount, clusterManagerNodeCount); + RemoteClusterStateCleanupManager remoteClusterStateCleanupManager = internalCluster().getClusterManagerNodeInstance( + RemoteClusterStateCleanupManager.class + ); + + assertEquals(CLUSTER_STATE_CLEANUP_INTERVAL_DEFAULT, remoteClusterStateCleanupManager.getStaleFileDeletionTask().getInterval()); + assertTrue(remoteClusterStateCleanupManager.getStaleFileDeletionTask().isScheduled()); + + // now disable + client().admin() + .cluster() + .prepareUpdateSettings() + .setPersistentSettings(Settings.builder().put(REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING.getKey(), -1)) + .get(); + + assertEquals(-1, remoteClusterStateCleanupManager.getStaleFileDeletionTask().getInterval().getMillis()); + assertFalse(remoteClusterStateCleanupManager.getStaleFileDeletionTask().isScheduled()); + + // now set Clean up interval to 1 min + client().admin() + .cluster() + .prepareUpdateSettings() + .setPersistentSettings(Settings.builder().put(REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING.getKey(), "1m")) + .get(); + assertEquals(1, remoteClusterStateCleanupManager.getStaleFileDeletionTask().getInterval().getMinutes()); + } + + public void testRemoteCleanupDeleteStale() throws Exception { + int shardCount = randomIntBetween(1, 2); + int replicaCount = 1; + int dataNodeCount = shardCount * (replicaCount + 1); + int clusterManagerNodeCount = 1; + + initialTestSetup(shardCount, replicaCount, dataNodeCount, clusterManagerNodeCount); + + // set cleanup interval to 100 ms to make the test faster + ClusterUpdateSettingsResponse response = client().admin() + .cluster() + .prepareUpdateSettings() + .setPersistentSettings(Settings.builder().put(REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING.getKey(), "100ms")) + .get(); + + assertTrue(response.isAcknowledged()); + + // update cluster state 21 times to ensure that clean up has run after this will upload 42 manifest files + // to repository, if manifest files are less than that it means clean up has run + updateClusterStateNTimes(RETAINED_MANIFESTS + SKIP_CLEANUP_STATE_CHANGES + 1); + + RepositoriesService repositoriesService = internalCluster().getClusterManagerNodeInstance(RepositoriesService.class); + BlobStoreRepository repository = (BlobStoreRepository) repositoriesService.repository(REPOSITORY_NAME); + BlobPath baseMetadataPath = repository.basePath() + .add( + Base64.getUrlEncoder() + .withoutPadding() + .encodeToString(getClusterState().getClusterName().value().getBytes(StandardCharsets.UTF_8)) + ) + .add("cluster-state") + .add(getClusterState().metadata().clusterUUID()); + BlobPath manifestContainerPath = baseMetadataPath.add("manifest"); + + assertBusy(() -> { + int manifestFiles = repository.blobStore().blobContainer(manifestContainerPath).listBlobsByPrefix("manifest").size(); + logger.info("number of current manifest file: {}", manifestFiles); + // we can't guarantee that we have same number of manifest as Retained manifest in our repo as there can be other queued task + // other than replica count change which can upload new manifest files, that's why we check that number of manifests is between + // Retained manifests and Retained manifests + 2 * Skip cleanup state changes (each cluster state update uploads 2 manifests) + assertTrue( + "Current number of manifest files: " + manifestFiles, + manifestFiles >= RETAINED_MANIFESTS && manifestFiles < RETAINED_MANIFESTS + 2 * SKIP_CLEANUP_STATE_CHANGES + ); + }, 500, TimeUnit.MILLISECONDS); + } + + private void updateClusterStateNTimes(int n) { + int newReplicaCount = randomIntBetween(0, 3); + for (int i = n; i > 0; i--) { + ClusterUpdateSettingsResponse response = client().admin() + .cluster() + .prepareUpdateSettings() + .setPersistentSettings(Settings.builder().put(CLUSTER_DEFAULT_INDEX_REFRESH_INTERVAL_SETTING.getKey(), i, TimeUnit.SECONDS)) + .get(); + assertTrue(response.isAcknowledged()); + } + } +} diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java index 3899c8a80f442..52b0980f5cd45 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java @@ -311,4 +311,14 @@ protected void restore(boolean restoreAllShards, String... indices) { PlainActionFuture.newFuture() ); } + + protected void prepareCluster(int numClusterManagerNodes, int numDataOnlyNodes, String indices, int replicaCount, int shardCount) { + internalCluster().startClusterManagerOnlyNodes(numClusterManagerNodes); + internalCluster().startDataOnlyNodes(numDataOnlyNodes); + for (String index : indices.split(",")) { + createIndex(index, remoteStoreIndexSettings(replicaCount, shardCount)); + ensureYellowAndNoInitializingShards(index); + ensureGreen(index); + } + } } diff --git a/server/src/main/java/org/opensearch/cluster/ClusterState.java b/server/src/main/java/org/opensearch/cluster/ClusterState.java index 9e63f961d241d..e38936d3e2622 100644 --- a/server/src/main/java/org/opensearch/cluster/ClusterState.java +++ b/server/src/main/java/org/opensearch/cluster/ClusterState.java @@ -496,38 +496,12 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws } if (metrics.contains(Metric.BLOCKS)) { - builder.startObject("blocks"); - - if (blocks().global().isEmpty() == false) { - builder.startObject("global"); - for (ClusterBlock block : blocks().global()) { - block.toXContent(builder, params); - } - builder.endObject(); - } - - if (blocks().indices().isEmpty() == false) { - builder.startObject("indices"); - for (final Map.Entry> entry : blocks().indices().entrySet()) { - builder.startObject(entry.getKey()); - for (ClusterBlock block : entry.getValue()) { - block.toXContent(builder, params); - } - builder.endObject(); - } - builder.endObject(); - } - - builder.endObject(); + blocks().toXContent(builder, params); } // nodes if (metrics.contains(Metric.NODES)) { - builder.startObject("nodes"); - for (DiscoveryNode node : nodes) { - node.toXContent(builder, params); - } - builder.endObject(); + nodes.toXContent(builder, params); } // meta data diff --git a/server/src/main/java/org/opensearch/cluster/RepositoryCleanupInProgress.java b/server/src/main/java/org/opensearch/cluster/RepositoryCleanupInProgress.java index debfe43f1d2b1..5e39d73583e19 100644 --- a/server/src/main/java/org/opensearch/cluster/RepositoryCleanupInProgress.java +++ b/server/src/main/java/org/opensearch/cluster/RepositoryCleanupInProgress.java @@ -33,12 +33,15 @@ import org.opensearch.LegacyESVersion; import org.opensearch.Version; +import org.opensearch.cluster.metadata.Metadata; import org.opensearch.core.common.Strings; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.common.io.stream.Writeable; import org.opensearch.core.xcontent.MediaTypeRegistry; import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.core.xcontent.XContentParserUtils; import org.opensearch.repositories.RepositoryOperation; import java.io.IOException; @@ -101,6 +104,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.startObject(); { builder.field("repository", entry.repository); + if (params.param(Metadata.CONTEXT_MODE_PARAM, Metadata.CONTEXT_MODE_API).equals(Metadata.CONTEXT_MODE_GATEWAY)) { + builder.field("repository_state_id", entry.repositoryStateId); + } // else we don't serialize it } builder.endObject(); } @@ -108,6 +114,36 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws return builder; } + public static RepositoryCleanupInProgress fromXContent(XContentParser parser) throws IOException { + if (parser.currentToken() == null) { + parser.nextToken(); + } + XContentParserUtils.ensureFieldName(parser, parser.currentToken(), TYPE); + parser.nextToken(); + XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_ARRAY, parser.currentToken(), parser); + List entries = new ArrayList<>(); + while (parser.nextToken() != XContentParser.Token.END_ARRAY) { + XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.nextToken(), parser); + String repository = null; + long repositoryStateId = -1L; + while (parser.nextToken() != XContentParser.Token.END_OBJECT) { + XContentParserUtils.ensureExpectedToken(XContentParser.Token.FIELD_NAME, parser.currentToken(), parser); + String currentFieldName = parser.currentName(); + parser.nextToken(); + if ("repository".equals(currentFieldName)) { + repository = parser.text(); + } else if ("repository_state_id".equals(currentFieldName)) { + // only XContent parsed with {@link Metadata.CONTEXT_MODE_GATEWAY} will have the repository state id and can be deserialized + repositoryStateId = parser.longValue(); + } else { + throw new IllegalArgumentException("unknown field [" + currentFieldName + "]"); + } + } + entries.add(new Entry(repository, repositoryStateId)); + } + return new RepositoryCleanupInProgress(entries); + } + @Override public String toString() { return Strings.toString(MediaTypeRegistry.JSON, this); diff --git a/server/src/main/java/org/opensearch/cluster/RestoreInProgress.java b/server/src/main/java/org/opensearch/cluster/RestoreInProgress.java index 769f97373f7b7..a1fcbab3cbb21 100644 --- a/server/src/main/java/org/opensearch/cluster/RestoreInProgress.java +++ b/server/src/main/java/org/opensearch/cluster/RestoreInProgress.java @@ -34,16 +34,22 @@ import org.opensearch.Version; import org.opensearch.cluster.ClusterState.Custom; +import org.opensearch.cluster.metadata.Metadata; import org.opensearch.common.annotation.PublicApi; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.common.io.stream.Writeable; import org.opensearch.core.index.shard.ShardId; import org.opensearch.core.xcontent.ToXContent; +import org.opensearch.core.xcontent.ToXContentFragment; import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.core.xcontent.XContentParserUtils; import org.opensearch.snapshots.Snapshot; +import org.opensearch.snapshots.SnapshotId; import java.io.IOException; +import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; @@ -52,6 +58,8 @@ import java.util.Objects; import java.util.UUID; +import static org.opensearch.core.xcontent.XContentParserUtils.ensureExpectedToken; + /** * Meta data about restore processes that are currently executing * @@ -144,7 +152,7 @@ public RestoreInProgress build() { * @opensearch.api */ @PublicApi(since = "1.0.0") - public static class Entry { + public static class Entry implements ToXContentFragment { private final String uuid; private final State state; private final Snapshot snapshot; @@ -236,6 +244,135 @@ public boolean equals(Object o) { public int hashCode() { return Objects.hash(uuid, snapshot, state, indices, shards); } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + boolean isGatewayXContent = params.param(Metadata.CONTEXT_MODE_PARAM, Metadata.CONTEXT_MODE_API) + .equals(Metadata.CONTEXT_MODE_GATEWAY); + builder.startObject(); + builder.field("snapshot", snapshot().getSnapshotId().getName()); + builder.field("repository", snapshot().getRepository()); + builder.field("state", state()); + if (isGatewayXContent) { + builder.field("snapshot_uuid", snapshot().getSnapshotId().getUUID()); + builder.field("uuid", uuid()); + } + builder.startArray("indices"); + { + for (String index : indices()) { + builder.value(index); + } + } + builder.endArray(); + builder.startArray("shards"); + { + for (final Map.Entry shardEntry : shards.entrySet()) { + ShardId shardId = shardEntry.getKey(); + ShardRestoreStatus status = shardEntry.getValue(); + builder.startObject(); + { + builder.field("index", shardId.getIndex()); + builder.field("shard", shardId.getId()); + builder.field("state", status.state()); + if (isGatewayXContent) { + builder.field("index_uuid", shardId.getIndex().getUUID()); + if (status.nodeId() != null) builder.field("node_id", status.nodeId()); + if (status.reason() != null) builder.field("reason", status.reason()); + } + } + builder.endObject(); + } + } + + builder.endArray(); + builder.endObject(); + return builder; + } + + public static Entry fromXContent(XContentParser parser) throws IOException { + if (parser.currentToken() == null) { + parser.nextToken(); + } + ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.currentToken(), parser); + String snapshotName = null; + String snapshotRepository = null; + String snapshotUUID = null; + int state = -1; + String uuid = null; + List indices = new ArrayList<>(); + Map shards = new HashMap<>(); + while (parser.nextToken() != XContentParser.Token.END_OBJECT) { + ensureExpectedToken(XContentParser.Token.FIELD_NAME, parser.currentToken(), parser); + String currentFieldName = parser.currentName(); + parser.nextToken(); + switch (currentFieldName) { + case "snapshot": + snapshotName = parser.text(); + break; + case "repository": + snapshotRepository = parser.text(); + break; + case "state": + state = parser.intValue(); + break; + case "snapshot_uuid": + snapshotUUID = parser.text(); + break; + case "uuid": + uuid = parser.text(); + break; + case "indices": + ensureExpectedToken(XContentParser.Token.START_ARRAY, parser.currentToken(), parser); + while (parser.nextToken() != XContentParser.Token.END_ARRAY) { + indices.add(parser.text()); + } + break; + case "shards": + ensureExpectedToken(XContentParser.Token.START_ARRAY, parser.currentToken(), parser); + while (parser.nextToken() != XContentParser.Token.END_ARRAY) { + String indexName = null; + String indexUUID = null; + int shardId = -1; + int restoreState = -1; + String nodeId = null; + String reason = null; + ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.currentToken(), parser); + while (parser.nextToken() != XContentParser.Token.END_OBJECT) { + ensureExpectedToken(XContentParser.Token.FIELD_NAME, parser.currentToken(), parser); + String currentShardFieldName = parser.currentName(); + parser.nextToken(); + switch (currentShardFieldName) { + case "index": + indexName = parser.text(); + break; + case "shard": + shardId = parser.intValue(); + break; + case "state": + restoreState = parser.intValue(); + break; + case "index_uuid": + indexUUID = parser.text(); + break; + case "node_id": + nodeId = parser.text(); + break; + case "reason": + reason = parser.text(); + break; + default: + throw new IllegalArgumentException("unknown field [" + currentFieldName + "]"); + } + } + shards.put(new ShardId(indexName, indexUUID, shardId), new ShardRestoreStatus(nodeId, State.fromValue((byte) restoreState), reason)); + } + break; + default: + throw new IllegalArgumentException("unknown field [" + currentFieldName + "]"); + } + } + return new Entry(uuid, new Snapshot(snapshotRepository, new SnapshotId(snapshotName, snapshotUUID)), State.fromValue((byte) state), indices, shards); + } } /** @@ -495,46 +632,57 @@ public void writeTo(StreamOutput out) throws IOException { public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params params) throws IOException { builder.startArray("snapshots"); for (final Entry entry : entries.values()) { - toXContent(entry, builder); + toXContent(entry, builder, ToXContent.EMPTY_PARAMS); } builder.endArray(); return builder; } + public static RestoreInProgress fromXContent(XContentParser parser) throws IOException { + if (parser.currentToken() == null) { + parser.nextToken(); + } + final Map entries = new HashMap<>(); + XContentParserUtils.ensureFieldName(parser, parser.currentToken(), "snapshots"); + while (parser.nextToken() != XContentParser.Token.END_ARRAY) { + final Entry entry = Entry.fromXContent(parser); + entries.put(entry.uuid, entry); + } + return new RestoreInProgress(entries); + } + /** * Serializes single restore operation * * @param entry restore operation metadata * @param builder XContent builder + * @param params */ - public void toXContent(Entry entry, XContentBuilder builder) throws IOException { - builder.startObject(); - builder.field("snapshot", entry.snapshot().getSnapshotId().getName()); - builder.field("repository", entry.snapshot().getRepository()); - builder.field("state", entry.state()); - builder.startArray("indices"); - { - for (String index : entry.indices()) { - builder.value(index); - } - } - builder.endArray(); - builder.startArray("shards"); - { - for (final Map.Entry shardEntry : entry.shards.entrySet()) { - ShardId shardId = shardEntry.getKey(); - ShardRestoreStatus status = shardEntry.getValue(); - builder.startObject(); - { - builder.field("index", shardId.getIndex()); - builder.field("shard", shardId.getId()); - builder.field("state", status.state()); - } - builder.endObject(); - } - } - - builder.endArray(); - builder.endObject(); + public void toXContent(Entry entry, XContentBuilder builder, ToXContent.Params params) throws IOException { + entry.toXContent(builder, params); } } + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/server/src/main/java/org/opensearch/cluster/SnapshotDeletionsInProgress.java b/server/src/main/java/org/opensearch/cluster/SnapshotDeletionsInProgress.java index 247eb7a195ca7..25d8db0152dd0 100644 --- a/server/src/main/java/org/opensearch/cluster/SnapshotDeletionsInProgress.java +++ b/server/src/main/java/org/opensearch/cluster/SnapshotDeletionsInProgress.java @@ -41,6 +41,8 @@ import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.common.io.stream.Writeable; import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.core.xcontent.XContentParserUtils; import org.opensearch.repositories.RepositoryOperation; import org.opensearch.snapshots.Snapshot; import org.opensearch.snapshots.SnapshotId; @@ -55,6 +57,9 @@ import java.util.Objects; import java.util.Set; +import static org.opensearch.core.xcontent.XContentParserUtils.ensureExpectedToken; +import static org.opensearch.core.xcontent.XContentParserUtils.ensureFieldName; + /** * A class that represents the snapshot deletions that are in progress in the cluster. * @@ -205,6 +210,52 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws return builder; } + public static SnapshotDeletionsInProgress fromXContent(XContentParser parser) throws IOException { + if (parser.currentToken() == null) { + parser.nextToken(); + } + ensureFieldName(parser, parser.currentToken(), TYPE); + parser.nextToken(); + ensureExpectedToken(XContentParser.Token.START_ARRAY, parser.currentToken(), parser); + List entries = new ArrayList<>(); + while ((parser.nextToken()) != XContentParser.Token.END_ARRAY) { + ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.currentToken(), parser); + String repository, repositoryStateId; + List snapshotNames = new ArrayList<>(); + TimeValue startTime; + while ((parser.nextToken()) != XContentParser.Token.END_OBJECT) { + ensureExpectedToken(XContentParser.Token.FIELD_NAME, parser.currentToken(), parser); + final String fieldName = parser.currentName(); + parser.nextToken(); + switch (fieldName) { + case "repository": + repository = parser.text(); + break; + case "snapshots": + ensureExpectedToken(XContentParser.Token.START_ARRAY, parser.currentToken(), parser); + while ((parser.nextToken()) != XContentParser.Token.END_ARRAY) { + snapshotNames.add(parser.text()); + } + break; + case "start_time_millis": + startTime = TimeValue.timeValueMillis(parser.longValue()); + break; + case "start_time": + startTime = TimeValue.parseTimeValue(parser.text(), "start_time"); + break; + case "repository_state_id": + repositoryStateId = parser.text(); + break; + default: + throw new IllegalArgumentException("unknown field [" + fieldName + "]"); + } + } +// entries.add(new Entry()) + } + return SnapshotDeletionsInProgress.of(entries); + } + + @Override public String toString() { StringBuilder builder = new StringBuilder("SnapshotDeletionsInProgress["); diff --git a/server/src/main/java/org/opensearch/cluster/block/ClusterBlock.java b/server/src/main/java/org/opensearch/cluster/block/ClusterBlock.java index 5fa897c0b1185..e131e4facc4b3 100644 --- a/server/src/main/java/org/opensearch/cluster/block/ClusterBlock.java +++ b/server/src/main/java/org/opensearch/cluster/block/ClusterBlock.java @@ -34,17 +34,20 @@ import org.opensearch.common.Nullable; import org.opensearch.common.annotation.PublicApi; +import org.opensearch.common.util.set.Sets; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.common.io.stream.Writeable; import org.opensearch.core.rest.RestStatus; import org.opensearch.core.xcontent.ToXContentFragment; import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.core.xcontent.XContentParser; import java.io.IOException; import java.util.EnumSet; import java.util.Locale; import java.util.Objects; +import java.util.Set; /** * Blocks the cluster for concurrency @@ -54,6 +57,18 @@ @PublicApi(since = "1.0.0") public class ClusterBlock implements Writeable, ToXContentFragment { + static final String KEY_UUID = "uuid"; + static final String KEY_DESCRIPTION = "description"; + static final String KEY_RETRYABLE = "retryable"; + static final String KEY_DISABLE_STATE_PERSISTENCE = "disable_state_persistence"; + static final String KEY_LEVELS = "levels"; + private static final Set VALID_FIELDS = Sets.newHashSet( + KEY_UUID, + KEY_DESCRIPTION, + KEY_RETRYABLE, + KEY_DISABLE_STATE_PERSISTENCE, + KEY_LEVELS + ); private final int id; @Nullable private final String uuid; @@ -156,14 +171,14 @@ public boolean disableStatePersistence() { public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(Integer.toString(id)); if (uuid != null) { - builder.field("uuid", uuid); + builder.field(KEY_UUID, uuid); } - builder.field("description", description); - builder.field("retryable", retryable); + builder.field(KEY_DESCRIPTION, description); + builder.field(KEY_RETRYABLE, retryable); if (disableStatePersistence) { - builder.field("disable_state_persistence", disableStatePersistence); + builder.field(KEY_DISABLE_STATE_PERSISTENCE, disableStatePersistence); } - builder.startArray("levels"); + builder.startArray(KEY_LEVELS); for (ClusterBlockLevel level : levels) { builder.value(level.name().toLowerCase(Locale.ROOT)); } @@ -172,6 +187,68 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws return builder; } + public static ClusterBlock fromXContent(XContentParser parser, int id) throws IOException { + String uuid = null; + String description = null; + boolean retryable = false; + boolean disableStatePersistence = false; + EnumSet levels = EnumSet.noneOf(ClusterBlockLevel.class); + String currentFieldName = skipBlockID(parser); + XContentParser.Token token; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + } else if (token.isValue()) { + switch (Objects.requireNonNull(currentFieldName)) { + case KEY_UUID: + uuid = parser.text(); + break; + case KEY_DESCRIPTION: + description = parser.text(); + break; + case KEY_RETRYABLE: + retryable = parser.booleanValue(); + break; + case KEY_DISABLE_STATE_PERSISTENCE: + disableStatePersistence = parser.booleanValue(); + break; + default: + throw new IllegalArgumentException("unknown field [" + currentFieldName + "]"); + } + } else if (token == XContentParser.Token.START_ARRAY) { + if (currentFieldName.equals(KEY_LEVELS)) { + while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { + levels.add(ClusterBlockLevel.fromString(parser.text(), Locale.ROOT)); + } + } else { + throw new IllegalArgumentException("unknown field [" + currentFieldName + "]"); + } + } else { + throw new IllegalArgumentException("unexpected token [" + token + "]"); + } + } + return new ClusterBlock(id, uuid, description, retryable, disableStatePersistence, false, null, levels); + } + + private static String skipBlockID(XContentParser parser) throws IOException { + if (parser.currentToken() == null) { + parser.nextToken(); + } + if (parser.currentToken() == XContentParser.Token.START_OBJECT) { + parser.nextToken(); + if (parser.currentToken() == XContentParser.Token.FIELD_NAME) { + String currentFieldName = parser.currentName(); + if (VALID_FIELDS.contains(currentFieldName)) { + return currentFieldName; + } else { + // we have hit block id, just move on + parser.nextToken(); + } + } + } + return null; + } + @Override public void writeTo(StreamOutput out) throws IOException { out.writeVInt(id); diff --git a/server/src/main/java/org/opensearch/cluster/block/ClusterBlockLevel.java b/server/src/main/java/org/opensearch/cluster/block/ClusterBlockLevel.java index 5d3bf94aedb19..4940234c21ba6 100644 --- a/server/src/main/java/org/opensearch/cluster/block/ClusterBlockLevel.java +++ b/server/src/main/java/org/opensearch/cluster/block/ClusterBlockLevel.java @@ -35,6 +35,7 @@ import org.opensearch.common.annotation.PublicApi; import java.util.EnumSet; +import java.util.Locale; /** * What level to block the cluster @@ -51,4 +52,11 @@ public enum ClusterBlockLevel { public static final EnumSet ALL = EnumSet.allOf(ClusterBlockLevel.class); public static final EnumSet READ_WRITE = EnumSet.of(READ, WRITE); + + /* + * This method is used to convert a string to a ClusterBlockLevel. + * */ + public static ClusterBlockLevel fromString(String level, Locale locale) { + return ClusterBlockLevel.valueOf(level.toUpperCase(locale)); + } } diff --git a/server/src/main/java/org/opensearch/cluster/block/ClusterBlocks.java b/server/src/main/java/org/opensearch/cluster/block/ClusterBlocks.java index 304136166d515..e188374251d0d 100644 --- a/server/src/main/java/org/opensearch/cluster/block/ClusterBlocks.java +++ b/server/src/main/java/org/opensearch/cluster/block/ClusterBlocks.java @@ -42,6 +42,11 @@ import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.rest.RestStatus; +import org.opensearch.core.xcontent.ToXContent; +import org.opensearch.core.xcontent.ToXContentFragment; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.core.xcontent.XContentParserUtils; import org.opensearch.index.IndexModule; import java.io.IOException; @@ -63,7 +68,7 @@ * @opensearch.api */ @PublicApi(since = "1.0.0") -public class ClusterBlocks extends AbstractDiffable { +public class ClusterBlocks extends AbstractDiffable implements ToXContentFragment { public static final ClusterBlocks EMPTY_CLUSTER_BLOCK = new ClusterBlocks(emptySet(), Map.of()); private final Set global; @@ -326,6 +331,16 @@ public static Diff readDiffFrom(StreamInput in) throws IOExceptio return AbstractDiffable.readDiffFrom(ClusterBlocks::readFrom, in); } + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + Builder.toXContext(this, builder, params); + return builder; + } + + public static ClusterBlocks fromXContent(XContentParser parser) throws IOException { + return Builder.fromXContent(parser); + } + /** * An immutable level holder. * @@ -427,10 +442,16 @@ public Builder removeGlobalBlock(int blockId) { } public Builder addIndexBlock(String index, ClusterBlock block) { + prepareIndexForBlocks(index); + indices.get(index).add(block); + return this; + } + + // initialize an index adding further blocks + private Builder prepareIndexForBlocks(String index) { if (!indices.containsKey(index)) { indices.put(index, new HashSet<>()); } - indices.get(index).add(block); return this; } @@ -480,5 +501,82 @@ public ClusterBlocks build() { } return new ClusterBlocks(unmodifiableSet(new HashSet<>(global)), indicesBuilder); } + + public static void toXContext(ClusterBlocks blocks, XContentBuilder builder, ToXContent.Params params) throws IOException { + builder.startObject("blocks"); + if (blocks.global().isEmpty() == false) { + builder.startObject("global"); + for (ClusterBlock block : blocks.global()) { + block.toXContent(builder, params); + } + builder.endObject(); + } + + if (blocks.indices().isEmpty() == false) { + builder.startObject("indices"); + for (Map.Entry> entry : blocks.indices().entrySet()) { + builder.startObject(entry.getKey()); + for (ClusterBlock block : entry.getValue()) { + block.toXContent(builder, params); + } + builder.endObject(); + } + builder.endObject(); + } + builder.endObject(); + } + + public static ClusterBlocks fromXContent(XContentParser parser) throws IOException { + Builder builder = new Builder(); + String currentFieldName = skipBlocksField(parser); + XContentParser.Token token; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + XContentParserUtils.ensureExpectedToken(XContentParser.Token.FIELD_NAME, token, parser); + currentFieldName = parser.currentName(); + parser.nextToken(); + switch (currentFieldName) { + case "global": + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + currentFieldName = parser.currentName(); + parser.nextToken(); + builder.addGlobalBlock(ClusterBlock.fromXContent(parser, Integer.parseInt(currentFieldName))); + } + break; + case "indices": + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + String indexName = parser.currentName(); + parser.nextToken(); + // prepare for this index as we want to add this to ClusterBlocks even if there is no Block associated with it + builder.prepareIndexForBlocks(indexName); + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + currentFieldName = parser.currentName(); + parser.nextToken(); + builder.addIndexBlock(indexName, ClusterBlock.fromXContent(parser, Integer.parseInt(currentFieldName))); + } + } + break; + default: + throw new IllegalArgumentException("unknown field [" + currentFieldName + "]"); + } + } + return builder.build(); + } + + private static String skipBlocksField(XContentParser parser) throws IOException { + if (parser.currentToken() == null) { + parser.nextToken(); + } + if (parser.currentToken() == XContentParser.Token.START_OBJECT) { + parser.nextToken(); + if (parser.currentToken() == XContentParser.Token.FIELD_NAME) { + if ("blocks".equals(parser.currentName())) { + parser.nextToken(); + } else { + return parser.currentName(); + } + } + } + return null; + } } } diff --git a/server/src/main/java/org/opensearch/cluster/coordination/CoordinationState.java b/server/src/main/java/org/opensearch/cluster/coordination/CoordinationState.java index 987a3e3ffa7d3..d8e48279a0db6 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/CoordinationState.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/CoordinationState.java @@ -104,6 +104,10 @@ public CoordinationState( this.isRemoteStateEnabled = isRemoteStoreClusterStateEnabled(settings); } + public boolean isRemoteStateEnabled() { + return isRemoteStateEnabled; + } + public long getCurrentTerm() { return persistedStateRegistry.getPersistedState(PersistedStateType.LOCAL).getCurrentTerm(); } @@ -449,6 +453,7 @@ public PublishResponse handlePublishRequest(PublishRequest publishRequest) { clusterState.version(), clusterState.term() ); + logger.info("Setting last accepted state : term - {}, version - {}", clusterState.term(), clusterState.version()); persistedStateRegistry.getPersistedState(PersistedStateType.LOCAL).setLastAcceptedState(clusterState); assert getLastAcceptedState() == clusterState; diff --git a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java index 5b2b2b36ea79e..738f0cab9e90f 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java @@ -85,6 +85,7 @@ import org.opensearch.discovery.PeerFinder; import org.opensearch.discovery.SeedHostsProvider; import org.opensearch.discovery.SeedHostsResolver; +import org.opensearch.gateway.remote.RemoteClusterStateService; import org.opensearch.monitor.NodeHealthService; import org.opensearch.monitor.StatusInfo; import org.opensearch.node.remotestore.RemoteStoreNodeService; @@ -208,7 +209,8 @@ public Coordinator( ElectionStrategy electionStrategy, NodeHealthService nodeHealthService, PersistedStateRegistry persistedStateRegistry, - RemoteStoreNodeService remoteStoreNodeService + RemoteStoreNodeService remoteStoreNodeService, + RemoteClusterStateService remoteClusterStateService ) { this.settings = settings; this.transportService = transportService; @@ -260,7 +262,8 @@ public Coordinator( transportService, namedWriteableRegistry, this::handlePublishRequest, - this::handleApplyCommit + this::handleApplyCommit, + remoteClusterStateService ); this.leaderChecker = new LeaderChecker(settings, clusterSettings, transportService, this::onLeaderFailure, nodeHealthService); this.followersChecker = new FollowersChecker( @@ -1321,7 +1324,7 @@ assert getLocalNode().equals(clusterState.getNodes().get(getLocalNode().getId()) + clusterState; final PublicationTransportHandler.PublicationContext publicationContext = publicationHandler.newPublicationContext( - clusterChangedEvent + clusterChangedEvent, coordinationState.get().isRemoteStateEnabled() ); final PublishRequest publishRequest = coordinationState.get().handleClientValue(clusterState); diff --git a/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java b/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java index 1fdaeead0d28d..1f1f795139651 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java @@ -31,6 +31,7 @@ package org.opensearch.cluster.coordination; +import java.util.Optional; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; @@ -47,6 +48,8 @@ import org.opensearch.core.common.io.stream.NamedWriteableRegistry; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.transport.TransportResponse; +import org.opensearch.gateway.remote.ClusterMetadataManifest; +import org.opensearch.gateway.remote.RemoteClusterStateService; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.BytesTransportRequest; import org.opensearch.transport.TransportChannel; @@ -74,6 +77,7 @@ public class PublicationTransportHandler { private static final Logger logger = LogManager.getLogger(PublicationTransportHandler.class); public static final String PUBLISH_STATE_ACTION_NAME = "internal:cluster/coordination/publish_state"; + public static final String PUBLISH_REMOTE_STATE_ACTION_NAME = "internal:cluster/coordination/publish_remote_state"; public static final String COMMIT_STATE_ACTION_NAME = "internal:cluster/coordination/commit_state"; private final TransportService transportService; @@ -97,16 +101,19 @@ public class PublicationTransportHandler { private final TransportRequestOptions stateRequestOptions = TransportRequestOptions.builder() .withType(TransportRequestOptions.Type.STATE) .build(); + private final RemoteClusterStateService remoteClusterStateService; public PublicationTransportHandler( TransportService transportService, NamedWriteableRegistry namedWriteableRegistry, Function handlePublishRequest, - BiConsumer> handleApplyCommit + BiConsumer> handleApplyCommit, + RemoteClusterStateService remoteClusterStateService ) { this.transportService = transportService; this.namedWriteableRegistry = namedWriteableRegistry; this.handlePublishRequest = handlePublishRequest; + this.remoteClusterStateService = remoteClusterStateService; transportService.registerRequestHandler( PUBLISH_STATE_ACTION_NAME, @@ -117,6 +124,15 @@ public PublicationTransportHandler( (request, channel, task) -> channel.sendResponse(handleIncomingPublishRequest(request)) ); + transportService.registerRequestHandler( + PUBLISH_REMOTE_STATE_ACTION_NAME, + ThreadPool.Names.GENERIC, + false, + false, + RemotePublishRequest::new, + (request, channel, task) -> channel.sendResponse(handleIncomingRemotePublishRequest(request)) + ); + transportService.registerRequestHandler( COMMIT_STATE_ACTION_NAME, ThreadPool.Names.GENERIC, @@ -211,6 +227,41 @@ private PublishWithJoinResponse handleIncomingPublishRequest(BytesTransportReque } } + private PublishWithJoinResponse handleIncomingRemotePublishRequest(RemotePublishRequest request) throws IOException { + final Optional manifestOptional = remoteClusterStateService.getClusterMetadataManifestByTermVersion(request.getClusterName(), request.getClusterUUID(), request.term, request.version); + if (manifestOptional.isPresent() == false) { + // todo change exception + throw new IncompatibleClusterStateVersionException("No remote state for term version"); + } + ClusterMetadataManifest manifest = manifestOptional.get(); + boolean applyFullState = false; + final ClusterState lastSeen = lastSeenClusterState.get(); + if (lastSeen == null) { + logger.debug("Diff cannot be applied as there is not last cluster state"); + applyFullState = true; + } else if (manifest.getDiffManifest() == null) { + logger.debug("There is no diff in the manifest"); + applyFullState = true; + } else if (manifest.getDiffManifest().getFromStateUUID().equals(lastSeen.stateUUID()) == false) { + logger.debug("Last cluster state not compatible with the diff"); + applyFullState = true; + } + + if (applyFullState == true) { + ClusterState clusterState = remoteClusterStateService.getClusterStateForManifest(request.getClusterName(), manifest, transportService.getLocalNode().getId()); + logger.debug("Downloaded full cluster state [{}]", clusterState); + final PublishWithJoinResponse response = acceptState(clusterState); + lastSeenClusterState.set(clusterState); + return response; + } else { + ClusterState clusterState = remoteClusterStateService.getClusterStateUsingDiff(request.getClusterName(), manifest, lastSeenClusterState.get(), transportService.getLocalNode().getId()); + logger.debug("Downloaded full cluster state from diff [{}]", clusterState); + final PublishWithJoinResponse response = acceptState(clusterState); + lastSeenClusterState.compareAndSet(lastSeen, clusterState); + return response; + } + } + private PublishWithJoinResponse acceptState(ClusterState incomingState) { // if the state is coming from the current node, use original request instead (see currentPublishRequestToSelf for explanation) if (transportService.getLocalNode().equals(incomingState.nodes().getClusterManagerNode())) { @@ -224,8 +275,8 @@ private PublishWithJoinResponse acceptState(ClusterState incomingState) { return handlePublishRequest.apply(new PublishRequest(incomingState)); } - public PublicationContext newPublicationContext(ClusterChangedEvent clusterChangedEvent) { - final PublicationContext publicationContext = new PublicationContext(clusterChangedEvent); + public PublicationContext newPublicationContext(ClusterChangedEvent clusterChangedEvent, boolean isRemoteStateEnabled) { + final PublicationContext publicationContext = new PublicationContext(clusterChangedEvent, isRemoteStateEnabled); // Build the serializations we expect to need now, early in the process, so that an error during serialization fails the publication // straight away. This isn't watertight since we send diffs on a best-effort basis and may fall back to sending a full state (and @@ -270,12 +321,14 @@ public class PublicationContext { private final boolean sendFullVersion; private final Map serializedStates = new HashMap<>(); private final Map serializedDiffs = new HashMap<>(); + private final boolean sendRemoteState; - PublicationContext(ClusterChangedEvent clusterChangedEvent) { + PublicationContext(ClusterChangedEvent clusterChangedEvent, boolean isRemoteStateEnabled) { discoveryNodes = clusterChangedEvent.state().nodes(); newState = clusterChangedEvent.state(); previousState = clusterChangedEvent.previousState(); sendFullVersion = previousState.getBlocks().disableStatePersistence(); + sendRemoteState = isRemoteStateEnabled; } void buildDiffAndSerializeStates() { @@ -339,7 +392,9 @@ public void onFailure(Exception e) { } else { responseActionListener = listener; } - if (sendFullVersion || previousState.nodes().nodeExists(destination) == false) { + if (sendRemoteState && destination.isRemoteStoreNode()) { + sendRemoteClusterState(destination, publishRequest.getAcceptedState(), responseActionListener); + } else if (sendFullVersion || previousState.nodes().nodeExists(destination) == false) { logger.trace("sending full cluster state version [{}] to [{}]", newState.version(), destination); sendFullClusterState(destination, responseActionListener); } else { @@ -384,6 +439,43 @@ public String executor() { ); } + private void sendRemoteClusterState(DiscoveryNode destination, ClusterState clusterState, ActionListener listener) { + try { + final RemotePublishRequest remotePublishRequest = new RemotePublishRequest(discoveryNodes.getLocalNode(), clusterState.term(), clusterState.getVersion(), clusterState.getClusterName().value(), clusterState.metadata().clusterUUID()); + final Consumer transportExceptionHandler = exp -> { + logger.debug(() -> new ParameterizedMessage("failed to send remote cluster state to {}", destination), exp); + listener.onFailure(exp); + }; + final TransportResponseHandler responseHandler = new TransportResponseHandler< + PublishWithJoinResponse>() { + + @Override + public PublishWithJoinResponse read(StreamInput in) throws IOException { + return new PublishWithJoinResponse(in); + } + + @Override + public void handleResponse(PublishWithJoinResponse response) { + listener.onResponse(response); + } + + @Override + public void handleException(TransportException exp) { + transportExceptionHandler.accept(exp); + } + + @Override + public String executor() { + return ThreadPool.Names.GENERIC; + } + }; + transportService.sendRequest(destination, PUBLISH_REMOTE_STATE_ACTION_NAME, remotePublishRequest, stateRequestOptions, responseHandler); + } catch (Exception e) { + logger.warn(() -> new ParameterizedMessage("error sending cluster state to {}", destination), e); + listener.onFailure(e); + } + } + private void sendFullClusterState(DiscoveryNode destination, ActionListener listener) { BytesReference bytes = serializedStates.get(destination.getVersion()); if (bytes == null) { diff --git a/server/src/main/java/org/opensearch/cluster/coordination/RemotePublishRequest.java b/server/src/main/java/org/opensearch/cluster/coordination/RemotePublishRequest.java new file mode 100644 index 0000000000000..b1981c0b4b1a0 --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/coordination/RemotePublishRequest.java @@ -0,0 +1,48 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.coordination; + +import java.io.IOException; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; + +public class RemotePublishRequest extends TermVersionRequest { + + // todo Do we need cluster name and UUID ? + private final String clusterName; + private final String clusterUUID; + + public RemotePublishRequest(DiscoveryNode sourceNode, long term, long version, String clusterName, String clusterUUID) { + super(sourceNode, term, version); + this.clusterName = clusterName; + this.clusterUUID = clusterUUID; + } + + public RemotePublishRequest(StreamInput in) throws IOException { + super(in); + this.clusterName = in.readString(); + this.clusterUUID = in.readString(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeString(clusterName); + out.writeString(clusterUUID); + } + + public String getClusterName() { + return clusterName; + } + + public String getClusterUUID() { + return clusterUUID; + } +} diff --git a/server/src/main/java/org/opensearch/cluster/metadata/IndexMetadata.java b/server/src/main/java/org/opensearch/cluster/metadata/IndexMetadata.java index 5a08cc717bff6..eacc83c2f7396 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/IndexMetadata.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/IndexMetadata.java @@ -754,6 +754,10 @@ public String getIndexUUID() { return index.getUUID(); } + public String getIndexName() { + return index.getName(); + } + /** * Test whether the current index UUID is the same as the given one. Returns true if either are _na_ */ diff --git a/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java b/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java index e38043c11e977..8e5fdeb090e03 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java @@ -176,6 +176,16 @@ public enum XContentContext { public interface Custom extends NamedDiffable, ToXContentFragment, ClusterState.FeatureAware { EnumSet context(); + + static Custom fromXContent(XContentParser parser, String name) throws IOException { + // handling any Exception is caller's responsibility + return parser.namedObject(Custom.class, name, null); + } + + static Custom fromXContent(XContentParser parser) throws IOException { + String currentFieldName = parser.currentName(); + return fromXContent(parser, currentFieldName); + } } public static final Setting DEFAULT_REPLICA_COUNT_SETTING = Setting.intSetting( @@ -261,7 +271,7 @@ public interface Custom extends NamedDiffable, ToXContentFragment, Clust private final Settings settings; private final DiffableStringMap hashesOfConsistentSettings; private final Map indices; - private final Map templates; + private final TemplatesMetadata templates; private final Map customs; private final transient int totalNumberOfShards; // Transient ? not serializable anyway? @@ -305,7 +315,7 @@ public interface Custom extends NamedDiffable, ToXContentFragment, Clust this.hashesOfConsistentSettings = hashesOfConsistentSettings; this.indices = Collections.unmodifiableMap(indices); this.customs = Collections.unmodifiableMap(customs); - this.templates = Collections.unmodifiableMap(templates); + this.templates = new TemplatesMetadata(templates); int totalNumberOfShards = 0; int totalOpenIndexShards = 0; for (IndexMetadata cursor : indices.values()) { @@ -807,13 +817,17 @@ public Map getIndices() { } public Map templates() { - return this.templates; + return this.templates.getTemplates(); } public Map getTemplates() { return templates(); } + public TemplatesMetadata templatesMetadata() { + return this.templates; + } + public Map componentTemplates() { return Optional.ofNullable((ComponentTemplateMetadata) this.custom(ComponentTemplateMetadata.TYPE)) .map(ComponentTemplateMetadata::componentTemplates) @@ -924,7 +938,7 @@ public Iterator iterator() { } public static boolean isGlobalStateEquals(Metadata metadata1, Metadata metadata2) { - if (!metadata1.coordinationMetadata.equals(metadata2.coordinationMetadata)) { + if (!isCoordinationMetadataEqual(metadata1, metadata2)) { return false; } if (!metadata1.hashesOfConsistentSettings.equals(metadata2.hashesOfConsistentSettings)) { @@ -943,13 +957,29 @@ public static boolean isGlobalStateEquals(Metadata metadata1, Metadata metadata2 * Compares Metadata entities persisted in Remote Store. */ public static boolean isGlobalResourcesMetadataEquals(Metadata metadata1, Metadata metadata2) { - if (!metadata1.persistentSettings.equals(metadata2.persistentSettings)) { + if (!isSettingsMetadataEqual(metadata1, metadata2)) { return false; } - if (!metadata1.templates.equals(metadata2.templates())) { + if (!isTemplatesMetadataEqual(metadata1, metadata2)) { return false; } // Check if any persistent metadata needs to be saved + return isCustomMetadataEqual(metadata1, metadata2); + } + + public static boolean isCoordinationMetadataEqual(Metadata metadata1, Metadata metadata2) { + return metadata1.coordinationMetadata.equals(metadata2.coordinationMetadata); + } + + public static boolean isSettingsMetadataEqual(Metadata metadata1, Metadata metadata2) { + return metadata1.persistentSettings.equals(metadata2.persistentSettings); + } + + public static boolean isTemplatesMetadataEqual(Metadata metadata1, Metadata metadata2) { + return metadata1.templates.equals(metadata2.templates); + } + + public static boolean isCustomMetadataEqual(Metadata metadata1, Metadata metadata2) { int customCount1 = 0; for (Map.Entry cursor : metadata1.customs.entrySet()) { if (cursor.getValue().context().contains(XContentContext.GATEWAY)) { @@ -963,8 +993,7 @@ public static boolean isGlobalResourcesMetadataEquals(Metadata metadata1, Metada customCount2++; } } - if (customCount1 != customCount2) return false; - return true; + return customCount1 == customCount2; } @Override @@ -1013,7 +1042,11 @@ private static class MetadataDiff implements Diff { persistentSettings = after.persistentSettings; hashesOfConsistentSettings = after.hashesOfConsistentSettings.diff(before.hashesOfConsistentSettings); indices = DiffableUtils.diff(before.indices, after.indices, DiffableUtils.getStringKeySerializer()); - templates = DiffableUtils.diff(before.templates, after.templates, DiffableUtils.getStringKeySerializer()); + templates = DiffableUtils.diff( + before.templates.getTemplates(), + after.templates.getTemplates(), + DiffableUtils.getStringKeySerializer() + ); customs = DiffableUtils.diff(before.customs, after.customs, DiffableUtils.getStringKeySerializer(), CUSTOM_VALUE_SERIALIZER); } @@ -1076,7 +1109,7 @@ public Metadata apply(Metadata part) { builder.persistentSettings(persistentSettings); builder.hashesOfConsistentSettings(hashesOfConsistentSettings.apply(part.hashesOfConsistentSettings)); builder.indices(indices.apply(part.indices)); - builder.templates(templates.apply(part.templates)); + builder.templates(templates.apply(part.templates.getTemplates())); builder.customs(customs.apply(part.customs)); return builder.build(); } @@ -1132,10 +1165,7 @@ public void writeTo(StreamOutput out) throws IOException { for (IndexMetadata indexMetadata : this) { indexMetadata.writeTo(out); } - out.writeVInt(templates.size()); - for (final IndexTemplateMetadata cursor : templates.values()) { - cursor.writeTo(out); - } + templates.writeTo(out); // filter out custom states not supported by the other node int numberOfCustoms = 0; for (final Custom cursor : customs.values()) { @@ -1199,7 +1229,7 @@ public Builder(Metadata metadata) { this.hashesOfConsistentSettings = metadata.hashesOfConsistentSettings; this.version = metadata.version; this.indices = new HashMap<>(metadata.indices); - this.templates = new HashMap<>(metadata.templates); + this.templates = new HashMap<>(metadata.templates.getTemplates()); this.customs = new HashMap<>(metadata.customs); this.previousMetadata = metadata; } @@ -1278,6 +1308,11 @@ public Builder templates(Map templates) { return this; } + public Builder templates(TemplatesMetadata templatesMetadata) { + this.templates.putAll(templatesMetadata.getTemplates()); + return this; + } + public Builder put(String name, ComponentTemplate componentTemplate) { Objects.requireNonNull(componentTemplate, "it is invalid to add a null component template: " + name); Map existingTemplates = Optional.ofNullable( @@ -1768,9 +1803,7 @@ public static void toXContent(Metadata metadata, XContentBuilder builder, ToXCon } builder.startObject("templates"); - for (final IndexTemplateMetadata cursor : metadata.templates().values()) { - IndexTemplateMetadata.Builder.toXContentWithTypes(cursor, builder, params); - } + metadata.templatesMetadata().toXContent(builder, params); builder.endObject(); if (context == XContentContext.API) { @@ -1833,12 +1866,10 @@ public static Metadata fromXContent(XContentParser parser) throws IOException { } else if ("hashes_of_consistent_settings".equals(currentFieldName)) { builder.hashesOfConsistentSettings(parser.mapStrings()); } else if ("templates".equals(currentFieldName)) { - while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { - builder.put(IndexTemplateMetadata.Builder.fromXContent(parser, parser.currentName())); - } + builder.templates(TemplatesMetadata.fromXContent(parser)); } else { try { - Custom custom = parser.namedObject(Custom.class, currentFieldName, null); + Custom custom = Custom.fromXContent(parser, currentFieldName); builder.putCustom(custom.getWriteableName(), custom); } catch (NamedObjectNotFoundException ex) { logger.warn("Skipping unknown custom object with type {}", currentFieldName); diff --git a/server/src/main/java/org/opensearch/cluster/metadata/RepositoriesMetadata.java b/server/src/main/java/org/opensearch/cluster/metadata/RepositoriesMetadata.java index e3689d046193c..9b52bdd1b16c5 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/RepositoriesMetadata.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/RepositoriesMetadata.java @@ -202,6 +202,10 @@ public static RepositoriesMetadata fromXContent(XContentParser parser) throws IO XContentParser.Token token; List repository = new ArrayList<>(); while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.START_OBJECT) { + // move to next token if parsing the whole object + token = parser.nextToken(); + } if (token == XContentParser.Token.FIELD_NAME) { String name = parser.currentName(); if (parser.nextToken() != XContentParser.Token.START_OBJECT) { diff --git a/server/src/main/java/org/opensearch/cluster/metadata/TemplatesMetadata.java b/server/src/main/java/org/opensearch/cluster/metadata/TemplatesMetadata.java new file mode 100644 index 0000000000000..6ecc471c5e0ae --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/metadata/TemplatesMetadata.java @@ -0,0 +1,150 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.metadata; + +import org.opensearch.cluster.AbstractDiffable; +import org.opensearch.common.annotation.PublicApi; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.xcontent.ToXContentFragment; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.core.xcontent.XContentParser; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; + +/** + * Metadata for legacy templates + * + * @opensearch.api + */ +@PublicApi(since = "2.15.0") +public class TemplatesMetadata extends AbstractDiffable implements ToXContentFragment { + public static TemplatesMetadata EMPTY_METADATA = builder().build(); + private final Map templates; + + public TemplatesMetadata() { + this(Collections.emptyMap()); + } + + public TemplatesMetadata(Map templates) { + this.templates = Collections.unmodifiableMap(templates); + } + + public static Builder builder() { + return new Builder(); + } + + public Map getTemplates() { + return this.templates; + } + + public static TemplatesMetadata fromXContent(XContentParser parser) throws IOException { + return Builder.fromXContent(parser); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + Builder.toXContent(this, builder, params); + return builder; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeVInt(templates.size()); + for (final IndexTemplateMetadata cursor : templates.values()) { + cursor.writeTo(out); + } + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + TemplatesMetadata that = (TemplatesMetadata) o; + + return Objects.equals(templates, that.templates); + } + + @Override + public int hashCode() { + return templates != null ? templates.hashCode() : 0; + } + + /** + * Builder for the templates metadata + * + * @opensearch.api + */ + @PublicApi(since = "2.15.0") + public static class Builder { + private final Map templates; + + public Builder() { + this.templates = new HashMap(); + } + + public Builder(Map templates) { + this.templates = templates; + } + + public Builder put(IndexTemplateMetadata.Builder templateBuilder) { + return put(templateBuilder.build()); + } + + public Builder put(IndexTemplateMetadata template) { + templates.put(template.name(), template); + return this; + } + + public Builder removeTemplate(String templateName) { + templates.remove(templateName); + return this; + } + + public Builder templates(Map templates) { + this.templates.putAll(templates); + return this; + } + + public TemplatesMetadata build() { + return new TemplatesMetadata(templates); + } + + public static void toXContent(TemplatesMetadata templatesMetadata, XContentBuilder builder, Params params) throws IOException { + for (IndexTemplateMetadata cursor : templatesMetadata.getTemplates().values()) { + IndexTemplateMetadata.Builder.toXContentWithTypes(cursor, builder, params); + } + } + + public static TemplatesMetadata fromXContent(XContentParser parser) throws IOException { + Builder builder = new Builder(); + + XContentParser.Token token = parser.currentToken(); + String currentFieldName = parser.currentName(); + if (currentFieldName == null) { + token = parser.nextToken(); + if (token == XContentParser.Token.START_OBJECT) { + // move to the field name + token = parser.nextToken(); + } + currentFieldName = parser.currentName(); + } + if (currentFieldName != null) { + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + builder.put(IndexTemplateMetadata.Builder.fromXContent(parser, parser.currentName())); + } + } + return builder.build(); + } + } +} diff --git a/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java b/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java index 93d6f6b0e4d22..91a92a8cb7b58 100644 --- a/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java +++ b/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java @@ -34,6 +34,7 @@ import org.opensearch.LegacyESVersion; import org.opensearch.Version; +import org.opensearch.cluster.metadata.Metadata; import org.opensearch.common.UUIDs; import org.opensearch.common.annotation.PublicApi; import org.opensearch.common.settings.Setting; @@ -44,6 +45,7 @@ import org.opensearch.core.common.transport.TransportAddress; import org.opensearch.core.xcontent.ToXContentFragment; import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.core.xcontent.XContentParser; import org.opensearch.node.Node; import java.io.IOException; @@ -62,6 +64,8 @@ import java.util.stream.Collectors; import java.util.stream.Stream; +import static org.opensearch.cluster.metadata.Metadata.CONTEXT_MODE_API; +import static org.opensearch.cluster.metadata.Metadata.CONTEXT_MODE_PARAM; import static org.opensearch.node.NodeRoleSettings.NODE_ROLES_SETTING; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_NODE_ATTRIBUTE_KEY_PREFIX; @@ -74,6 +78,14 @@ public class DiscoveryNode implements Writeable, ToXContentFragment { static final String COORDINATING_ONLY = "coordinating_only"; + static final String KEY_NAME = "name"; + static final String KEY_EPHEMERAL_ID = "ephemeral_id"; + static final String KEY_HOST_NAME = "host_name"; + static final String KEY_HOST_ADDRESS = "host_address"; + static final String KEY_TRANSPORT_ADDRESS = "transport_address"; + static final String KEY_ATTRIBUTES = "attributes"; + static final String KEY_VERSION = "version"; + static final String KEY_ROLES = "roles"; public static boolean nodeRequiresLocalStorage(Settings settings) { boolean localStorageEnable = Node.NODE_LOCAL_STORAGE_SETTING.get(settings); @@ -594,21 +606,97 @@ public String toString() { @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + Metadata.XContentContext context = Metadata.XContentContext.valueOf(params.param(CONTEXT_MODE_PARAM, CONTEXT_MODE_API)); builder.startObject(getId()); - builder.field("name", getName()); - builder.field("ephemeral_id", getEphemeralId()); - builder.field("transport_address", getAddress().toString()); + builder.field(KEY_NAME, getName()); + builder.field(KEY_EPHEMERAL_ID, getEphemeralId()); + builder.field(KEY_TRANSPORT_ADDRESS, getAddress().toString()); - builder.startObject("attributes"); + builder.startObject(KEY_ATTRIBUTES); for (Map.Entry entry : attributes.entrySet()) { builder.field(entry.getKey(), entry.getValue()); } builder.endObject(); + if (context == Metadata.XContentContext.GATEWAY) { + builder.field(KEY_HOST_NAME, getHostName()); + builder.field(KEY_HOST_ADDRESS, getHostAddress()); + builder.field(KEY_VERSION, getVersion().toString()); + builder.startArray(KEY_ROLES); + for (DiscoveryNodeRole role : roles) { + builder.value(role.roleName()); + } + builder.endArray(); + } builder.endObject(); return builder; } + public static DiscoveryNode fromXContent(XContentParser parser, String nodeId) throws IOException { + if (parser.currentToken() == null) { + parser.nextToken(); + } + if (parser.currentToken() == XContentParser.Token.START_OBJECT) { + parser.nextToken(); + } + if (parser.currentToken() != XContentParser.Token.FIELD_NAME) { + throw new IllegalArgumentException("expected field name but got a " + parser.currentToken()); + } + String nodeName = null; + String hostName = null; + String hostAddress = null; + String ephemeralId = null; + TransportAddress transportAddress = null; + Map attributes = new HashMap<>(); + Set roles = new HashSet<>(); + Version version = null; + String currentFieldName = parser.currentName(); + // token should be start object at this point + // XContentParser.Token token = parser.nextToken(); + // if (token != XContentParser.Token.START_OBJECT) { + // throw new IllegalArgumentException("expected object but got a " + token); + // } + XContentParser.Token token; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + } else if (token.isValue()) { + if (KEY_NAME.equals(currentFieldName)) { + nodeName = parser.text(); + } else if (KEY_EPHEMERAL_ID.equals(currentFieldName)) { + ephemeralId = parser.text(); + } else if (KEY_TRANSPORT_ADDRESS.equals(currentFieldName)) { + transportAddress = TransportAddress.fromString(parser.text()); + } else if (KEY_HOST_NAME.equals(currentFieldName)) { + hostName = parser.text(); + } else if (KEY_HOST_ADDRESS.equals(currentFieldName)) { + hostAddress = parser.text(); + } else if (KEY_VERSION.equals(currentFieldName)) { + version = Version.fromString(parser.text()); + } + } else if (token == XContentParser.Token.START_OBJECT) { + if (KEY_ATTRIBUTES.equals(currentFieldName)) { + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + } else if (token.isValue()) { + attributes.put(currentFieldName, parser.text()); + } + } + } + } else if (token == XContentParser.Token.START_ARRAY) { + if (KEY_ROLES.equals(currentFieldName)) { + while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { + roles.add(getRoleFromRoleName(parser.text())); + } + } + } else { + throw new IllegalArgumentException("unexpected token " + token); + } + } + return new DiscoveryNode(nodeName, nodeId, ephemeralId, hostName, hostAddress, transportAddress, attributes, roles, version); + } + private static Map rolesToMap(final Stream roles) { return Collections.unmodifiableMap(roles.collect(Collectors.toMap(DiscoveryNodeRole::roleName, Function.identity()))); } diff --git a/server/src/main/java/org/opensearch/cluster/node/DiscoveryNodes.java b/server/src/main/java/org/opensearch/cluster/node/DiscoveryNodes.java index 203d45c3318c3..c5ecc2a188079 100644 --- a/server/src/main/java/org/opensearch/cluster/node/DiscoveryNodes.java +++ b/server/src/main/java/org/opensearch/cluster/node/DiscoveryNodes.java @@ -32,10 +32,10 @@ package org.opensearch.cluster.node; -import org.opensearch.LegacyESVersion; import org.opensearch.Version; import org.opensearch.cluster.AbstractDiffable; import org.opensearch.cluster.Diff; +import org.opensearch.cluster.metadata.Metadata; import org.opensearch.common.Booleans; import org.opensearch.common.Nullable; import org.opensearch.common.annotation.PublicApi; @@ -45,6 +45,10 @@ import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.common.transport.TransportAddress; +import org.opensearch.core.xcontent.ToXContentFragment; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.core.xcontent.XContentParserUtils; import java.io.IOException; import java.util.ArrayList; @@ -60,6 +64,9 @@ import java.util.stream.Stream; import java.util.stream.StreamSupport; +import static org.opensearch.cluster.metadata.Metadata.CONTEXT_MODE_API; +import static org.opensearch.cluster.metadata.Metadata.CONTEXT_MODE_PARAM; + /** * This class holds all {@link DiscoveryNode} in the cluster and provides convenience methods to * access, modify merge / diff discovery nodes. @@ -67,7 +74,7 @@ * @opensearch.api */ @PublicApi(since = "1.0.0") -public class DiscoveryNodes extends AbstractDiffable implements Iterable { +public class DiscoveryNodes extends AbstractDiffable implements Iterable, ToXContentFragment { public static final DiscoveryNodes EMPTY_NODES = builder().build(); @@ -282,18 +289,6 @@ public boolean nodeExistsWithSameRoles(DiscoveryNode discoveryNode) { return existing != null && existing.equals(discoveryNode) && existing.getRoles().equals(discoveryNode.getRoles()); } - /** - * Determine if the given node exists and has the right version. During upgrade from Elasticsearch version as OpenSearch node run in - * BWC mode and can have the version as 7.10.2 in cluster state from older cluster-manager to OpenSearch cluster-manager. - */ - public boolean nodeExistsWithBWCVersion(DiscoveryNode discoveryNode) { - final DiscoveryNode existing = nodes.get(discoveryNode.getId()); - return existing != null - && existing.equals(discoveryNode) - && existing.getVersion().equals(LegacyESVersion.V_7_10_2) - && discoveryNode.getVersion().onOrAfter(Version.V_1_0_0); - } - /** * Get the id of the cluster-manager node * @@ -579,6 +574,64 @@ public String toString() { return sb.toString(); } + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject("nodes"); + for (DiscoveryNode node : this) { + node.toXContent(builder, params); + } + builder.endObject(); + Metadata.XContentContext context = Metadata.XContentContext.valueOf(params.param(CONTEXT_MODE_PARAM, CONTEXT_MODE_API)); + if (context == Metadata.XContentContext.GATEWAY && clusterManagerNodeId != null) { + builder.field("cluster_manager", clusterManagerNodeId); + } + return builder; + } + + public static DiscoveryNodes fromXContent(XContentParser parser) throws IOException { + Builder builder = new Builder(); + if (parser.currentToken() == null) { + parser.nextToken(); + } + if (parser.currentToken() == XContentParser.Token.START_OBJECT) { + parser.nextToken(); + } + XContentParserUtils.ensureExpectedToken(XContentParser.Token.FIELD_NAME, parser.currentToken(), parser); + XContentParser.Token token; + String currentFieldName = parser.currentName(); + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + } else if (token == XContentParser.Token.START_OBJECT) { + if ("nodes".equals(currentFieldName)) { + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + } else if (token == XContentParser.Token.START_OBJECT) { + String nodeId = currentFieldName; + DiscoveryNode node = DiscoveryNode.fromXContent(parser, nodeId); + builder.add(node); + } + } + } else { + throw new IllegalArgumentException("unexpected object field " + currentFieldName); + } + } else if (token.isValue()) { + if ("cluster_manager".equals(currentFieldName)) { + String clusterManagerNodeId = parser.text(); + if (clusterManagerNodeId != null) { + builder.clusterManagerNodeId(clusterManagerNodeId); + } + } else { + throw new IllegalArgumentException("unexpected value field " + currentFieldName); + } + } else { + throw new IllegalArgumentException("unexpected token " + token); + } + } + return builder.build(); + } + /** * Delta between nodes. * diff --git a/server/src/main/java/org/opensearch/cluster/routing/RoutingTable.java b/server/src/main/java/org/opensearch/cluster/routing/RoutingTable.java index e4095a84be081..6c7b94f316da2 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/RoutingTable.java +++ b/server/src/main/java/org/opensearch/cluster/routing/RoutingTable.java @@ -79,7 +79,7 @@ public class RoutingTable implements Iterable, Diffable indicesRouting; - private RoutingTable(long version, final Map indicesRouting) { + public RoutingTable(long version, final Map indicesRouting) { this.version = version; this.indicesRouting = Collections.unmodifiableMap(indicesRouting); } diff --git a/server/src/main/java/org/opensearch/cluster/routing/remote/RemoteRoutingTableService.java b/server/src/main/java/org/opensearch/cluster/routing/remote/RemoteRoutingTableService.java new file mode 100644 index 0000000000000..266709894c1b7 --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/routing/remote/RemoteRoutingTableService.java @@ -0,0 +1,305 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.routing.remote; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.store.IndexInput; +import org.opensearch.action.LatchedActionListener; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.routing.IndexRoutingTable; +import org.opensearch.common.CheckedRunnable; +import org.opensearch.common.blobstore.AsyncMultiStreamBlobContainer; +import org.opensearch.common.blobstore.BlobContainer; +import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.common.blobstore.stream.write.WritePriority; +import org.opensearch.common.blobstore.transfer.RemoteTransferContainer; +import org.opensearch.common.blobstore.transfer.stream.OffsetRangeIndexInputStream; + +import org.opensearch.common.lucene.store.ByteArrayIndexInput; +import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.io.IOUtils; +import org.opensearch.core.action.ActionListener; +import org.opensearch.core.index.Index; +import org.opensearch.core.xcontent.ToXContent; +import org.opensearch.gateway.remote.ClusterMetadataManifest; +import org.opensearch.gateway.remote.RemoteClusterStateUtils; +import org.opensearch.gateway.remote.RemoteReadResult; +import org.opensearch.gateway.remote.routingtable.IndexRoutingTableInputStream; +import org.opensearch.gateway.remote.routingtable.IndexRoutingTableInputStreamReader; +import org.opensearch.index.remote.RemoteStoreEnums; +import org.opensearch.index.remote.RemoteStorePathStrategy; +import org.opensearch.index.remote.RemoteStoreUtils; +import org.opensearch.node.Node; +import org.opensearch.node.remotestore.RemoteStoreNodeAttribute; +import org.opensearch.repositories.RepositoriesService; +import org.opensearch.repositories.Repository; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.threadpool.ThreadPool; + +import java.io.Closeable; +import java.io.IOException; + +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ExecutorService; +import java.util.function.Function; +import java.util.function.Supplier; +import java.util.stream.Collectors; + +import static org.opensearch.common.blobstore.transfer.RemoteTransferContainer.checksumOfChecksum; +import static org.opensearch.gateway.remote.RemoteClusterStateUtils.getCusterMetadataBasePath; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.isRemoteRoutingTableEnabled; + +/** + * A Service which provides APIs to upload and download routing table from remote store. + * + * @opensearch.internal + */ +public class RemoteRoutingTableService implements Closeable { + + /** + * Cluster setting to specify if routing table should be published to remote store + */ + public static final Setting REMOTE_ROUTING_TABLE_ENABLED_SETTING = Setting.boolSetting( + "cluster.remote_store.routing.enabled", + true, + Setting.Property.NodeScope, + Setting.Property.Final + ); + public static final String INDEX_ROUTING_PATH_TOKEN = "index-routing"; + public static final String ROUTING_TABLE = "routing-table"; + public static final String INDEX_ROUTING_FILE_PREFIX = "index_routing"; + public static final String DELIMITER = "__"; + public static final String INDEX_ROUTING_METADATA_PREFIX = "indexRouting--"; + private static final Logger logger = LogManager.getLogger(RemoteRoutingTableService.class); + private final Settings settings; + private final Supplier repositoriesService; + private BlobStoreRepository blobStoreRepository; + private final ThreadPool threadPool; + + public RemoteRoutingTableService(Supplier repositoriesService, + Settings settings, + ThreadPool threadPool) { + assert isRemoteRoutingTableEnabled(settings) : "Remote routing table is not enabled"; + this.repositoriesService = repositoriesService; + this.settings = settings; + this.threadPool = threadPool; + } + + public List getChangedIndicesRouting( ClusterState previousClusterState, + ClusterState clusterState) { + Map previousIndexRoutingTable = previousClusterState.getRoutingTable().getIndicesRouting(); + List changedIndicesRouting = new ArrayList<>(); + for (IndexRoutingTable indexRouting : clusterState.getRoutingTable().getIndicesRouting().values()) { + if (!(previousIndexRoutingTable.containsKey(indexRouting.getIndex().getName()) && indexRouting.equals(previousIndexRoutingTable.get(indexRouting.getIndex().getName())))) { + changedIndicesRouting.add(indexRouting); + logger.info("changedIndicesRouting {}", indexRouting.prettyPrint()); + } + } + + return changedIndicesRouting; + } + + public CheckedRunnable getIndexRoutingAsyncAction( + ClusterState clusterState, + IndexRoutingTable indexRouting, + LatchedActionListener latchedActionListener + ) throws IOException { + + BlobPath custerMetadataBasePath = getCusterMetadataBasePath(blobStoreRepository, clusterState.getClusterName().value(), + clusterState.metadata().clusterUUID()).add(INDEX_ROUTING_PATH_TOKEN); + logger.info("custerMetadataBasePath {}", custerMetadataBasePath); + + BlobPath path = RemoteStoreEnums.PathType.HASHED_PREFIX.path(RemoteStorePathStrategy.PathInput.builder() + .basePath(custerMetadataBasePath) + .indexUUID(indexRouting.getIndex().getUUID()) + .build(), + RemoteStoreEnums.PathHashAlgorithm.FNV_1A_BASE64); + logger.info("path from prefix hasd {}", path); + final BlobContainer blobContainer = blobStoreRepository.blobStore().blobContainer(path); + + final String fileName = getIndexRoutingFileName(); + logger.info("fileName {}", fileName); + + ActionListener completionListener = ActionListener.wrap( + resp -> latchedActionListener.onResponse( + new ClusterMetadataManifest.UploadedIndexMetadata( + + indexRouting.getIndex().getName(), + indexRouting.getIndex().getUUID(), + path.buildAsString() + fileName, + INDEX_ROUTING_METADATA_PREFIX + ) + ), + ex -> latchedActionListener.onFailure(new RemoteClusterStateUtils.RemoteStateTransferException(indexRouting.getIndex().toString(), ex)) + ); + + if (blobContainer instanceof AsyncMultiStreamBlobContainer == false) { + logger.info("TRYING FILE UPLOAD"); + + return () -> { + logger.info("Going to upload {}", indexRouting.prettyPrint()); + + uploadIndex(indexRouting, fileName , blobContainer); + logger.info("upload done {}", indexRouting.prettyPrint()); + + completionListener.onResponse(null); + logger.info("response done {}", indexRouting.prettyPrint()); + + }; + } + + logger.info("TRYING S3 UPLOAD"); + + //TODO: Integrate with S3AsyncCrtClient for using buffered stream directly with putObject. + try ( + InputStream indexRoutingStream = new IndexRoutingTableInputStream(indexRouting); + IndexInput input = new ByteArrayIndexInput("indexrouting", indexRoutingStream.readAllBytes())) { + long expectedChecksum; + try { + expectedChecksum = checksumOfChecksum(input.clone(), 8); + } catch (Exception e) { + throw e; + } + try ( + RemoteTransferContainer remoteTransferContainer = new RemoteTransferContainer( + fileName, + fileName, + input.length(), + true, + WritePriority.URGENT, + (size, position) -> new OffsetRangeIndexInputStream(input, size, position), + expectedChecksum, + ((AsyncMultiStreamBlobContainer) blobContainer).remoteIntegrityCheckSupported() + ) + ) { + return () -> ((AsyncMultiStreamBlobContainer) blobContainer).asyncBlobUpload(remoteTransferContainer.createWriteContext(), completionListener); + } catch (IOException e) { + e.printStackTrace(); + return null; + } + } + } + + + public List getAllUploadedIndicesRouting(ClusterMetadataManifest previousManifest, List indicesRoutingToUpload, Set indicesRoutingToDelete) { + final Map allUploadedIndicesRouting = previousManifest.getIndicesRouting() + .stream() + .collect(Collectors.toMap(ClusterMetadataManifest.UploadedIndexMetadata::getIndexName, Function.identity())); + + indicesRoutingToUpload.forEach( + uploadedIndexRouting -> allUploadedIndicesRouting.put(uploadedIndexRouting.getIndexName(), uploadedIndexRouting) + ); + + indicesRoutingToDelete.forEach(index -> allUploadedIndicesRouting.remove(index)); + + logger.info("allUploadedIndicesRouting ROUTING {}", allUploadedIndicesRouting); + + return new ArrayList<>(allUploadedIndicesRouting.values()); + } + + private void uploadIndex(IndexRoutingTable indexRouting, String fileName, BlobContainer container) { + logger.info("Starting write"); + + try { + InputStream indexRoutingStream = new IndexRoutingTableInputStream(indexRouting); + container.writeBlob(fileName, indexRoutingStream, 4096, true); + logger.info("SUccessful write"); + } catch (IOException e) { + logger.error("Failed to write {}", e); + } + } + + private String getIndexRoutingFileName() { + return String.join( + DELIMITER, + INDEX_ROUTING_FILE_PREFIX, + RemoteStoreUtils.invertLong(System.currentTimeMillis()) + ); + + } + + public CheckedRunnable getAsyncIndexMetadataReadAction( + String uploadedFilename, + Index index, + LatchedActionListener latchedActionListener) { + int idx = uploadedFilename.lastIndexOf("/"); + String blobFileName = uploadedFilename.substring(idx+1); + BlobContainer blobContainer = blobStoreRepository.blobStore().blobContainer( BlobPath.cleanPath().add(uploadedFilename.substring(0,idx))); + + return () -> readAsync( + blobContainer, + blobFileName, + threadPool.executor(ThreadPool.Names.GENERIC), + ActionListener.wrap(response -> latchedActionListener.onResponse(new RemoteIndexRoutingResult(index.getName(), response.readIndexRoutingTable(index))), latchedActionListener::onFailure) + ); + } + + public void readAsync(BlobContainer blobContainer, String name, ExecutorService executorService, ActionListener listener) throws IOException { + executorService.execute(() -> { + try { + listener.onResponse(read(blobContainer, name)); + } catch (Exception e) { + logger.error("routing table download failed : ", e); + listener.onFailure(e); + } + }); + } + + public IndexRoutingTableInputStreamReader read(BlobContainer blobContainer, String path) { + try { + return new IndexRoutingTableInputStreamReader(blobContainer.readBlob(path)); + } catch (IOException e) { + logger.info("RoutingTable read failed with error: {}", e.toString()); + } + return null; + } + + @Override + public void close() throws IOException { + if (blobStoreRepository != null) { + IOUtils.close(blobStoreRepository); + } + } + + public void start() { + assert isRemoteRoutingTableEnabled(settings) == true : "Remote routing table is not enabled"; + final String remoteStoreRepo = settings.get( + Node.NODE_ATTRIBUTES.getKey() + RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY + ); + assert remoteStoreRepo != null : "Remote routing table repository is not configured"; + final Repository repository = repositoriesService.get().repository(remoteStoreRepo); + assert repository instanceof BlobStoreRepository : "Repository should be instance of BlobStoreRepository"; + blobStoreRepository = (BlobStoreRepository) repository; + } + + public static class RemoteIndexRoutingResult { + String indexName; + IndexRoutingTable indexRoutingTable; + + public RemoteIndexRoutingResult(String indexName, IndexRoutingTable indexRoutingTable) { + this.indexName = indexName; + this.indexRoutingTable = indexRoutingTable; + } + + public String getIndexName() { + return indexName; + } + + public IndexRoutingTable getIndexRoutingTable() { + return indexRoutingTable; + } + } + +} diff --git a/server/src/main/java/org/opensearch/cluster/routing/remote/package-info.java b/server/src/main/java/org/opensearch/cluster/routing/remote/package-info.java new file mode 100644 index 0000000000000..9fe016e783f20 --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/routing/remote/package-info.java @@ -0,0 +1,10 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** Package containing class to perform operations on remote routing table */ +package org.opensearch.cluster.routing.remote; diff --git a/server/src/main/java/org/opensearch/common/io/Streams.java b/server/src/main/java/org/opensearch/common/io/Streams.java index f0d8f8b8bedbf..bb70d1901e6a7 100644 --- a/server/src/main/java/org/opensearch/common/io/Streams.java +++ b/server/src/main/java/org/opensearch/common/io/Streams.java @@ -44,10 +44,12 @@ import java.io.InputStreamReader; import java.io.OutputStream; import java.io.Reader; +import java.io.SequenceInputStream; import java.io.StringWriter; import java.io.Writer; import java.nio.charset.StandardCharsets; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Objects; import java.util.function.Consumer; @@ -221,6 +223,11 @@ public static BytesReference readFully(InputStream in) throws IOException { return out.bytes(); } + public static BytesReference readStreamList(List streams) throws IOException { + SequenceInputStream sis = new SequenceInputStream(Collections.enumeration(streams)); + return readFully(sis); + } + /** * Limits the given input stream to the provided number of bytes */ diff --git a/server/src/main/java/org/opensearch/index/translog/BufferedChecksumStreamInput.java b/server/src/main/java/org/opensearch/common/io/stream/BufferedChecksumStreamInput.java similarity index 99% rename from server/src/main/java/org/opensearch/index/translog/BufferedChecksumStreamInput.java rename to server/src/main/java/org/opensearch/common/io/stream/BufferedChecksumStreamInput.java index f75f27b7bcb91..f3341712275f9 100644 --- a/server/src/main/java/org/opensearch/index/translog/BufferedChecksumStreamInput.java +++ b/server/src/main/java/org/opensearch/common/io/stream/BufferedChecksumStreamInput.java @@ -30,7 +30,7 @@ * GitHub history for details. */ -package org.opensearch.index.translog; +package org.opensearch.common.io.stream; import org.apache.lucene.store.BufferedChecksum; import org.apache.lucene.util.BitUtil; diff --git a/server/src/main/java/org/opensearch/index/translog/BufferedChecksumStreamOutput.java b/server/src/main/java/org/opensearch/common/io/stream/BufferedChecksumStreamOutput.java similarity index 98% rename from server/src/main/java/org/opensearch/index/translog/BufferedChecksumStreamOutput.java rename to server/src/main/java/org/opensearch/common/io/stream/BufferedChecksumStreamOutput.java index 9e96664c79cc5..254f228f1c739 100644 --- a/server/src/main/java/org/opensearch/index/translog/BufferedChecksumStreamOutput.java +++ b/server/src/main/java/org/opensearch/common/io/stream/BufferedChecksumStreamOutput.java @@ -30,7 +30,7 @@ * GitHub history for details. */ -package org.opensearch.index.translog; +package org.opensearch.common.io.stream; import org.apache.lucene.store.BufferedChecksum; import org.opensearch.common.annotation.PublicApi; diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index 828d2f97806f6..8e1f7f920e4ef 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -34,7 +34,6 @@ import org.apache.logging.log4j.LogManager; import org.opensearch.action.admin.cluster.configuration.TransportAddVotingConfigExclusionsAction; import org.opensearch.action.admin.indices.close.TransportCloseIndexAction; -import org.opensearch.action.main.TransportMainAction; import org.opensearch.action.search.CreatePitController; import org.opensearch.action.search.SearchRequestSlowLog; import org.opensearch.action.search.SearchRequestStats; @@ -77,6 +76,7 @@ import org.opensearch.cluster.routing.allocation.decider.SameShardAllocationDecider; import org.opensearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider; import org.opensearch.cluster.routing.allocation.decider.ThrottlingAllocationDecider; +import org.opensearch.cluster.routing.remote.RemoteRoutingTableService; import org.opensearch.cluster.service.ClusterApplierService; import org.opensearch.cluster.service.ClusterManagerService; import org.opensearch.cluster.service.ClusterManagerTaskThrottler; @@ -84,6 +84,7 @@ import org.opensearch.common.annotation.PublicApi; import org.opensearch.common.cache.CacheType; import org.opensearch.common.cache.settings.CacheSettings; +import org.opensearch.common.cache.store.settings.OpenSearchOnHeapCacheSettings; import org.opensearch.common.logging.Loggers; import org.opensearch.common.network.NetworkModule; import org.opensearch.common.network.NetworkService; @@ -102,7 +103,11 @@ import org.opensearch.gateway.DanglingIndicesState; import org.opensearch.gateway.GatewayService; import org.opensearch.gateway.PersistedClusterStateService; +import org.opensearch.gateway.remote.RemoteClusterStateCleanupManager; import org.opensearch.gateway.remote.RemoteClusterStateService; +import org.opensearch.gateway.remote.RemoteGlobalMetadataManager; +import org.opensearch.gateway.remote.RemoteIndexMetadataManager; +import org.opensearch.gateway.remote.RemoteManifestManager; import org.opensearch.http.HttpTransportSettings; import org.opensearch.index.IndexModule; import org.opensearch.index.IndexSettings; @@ -118,6 +123,7 @@ import org.opensearch.indices.IndicesQueryCache; import org.opensearch.indices.IndicesRequestCache; import org.opensearch.indices.IndicesService; +import org.opensearch.indices.RemoteStoreSettings; import org.opensearch.indices.ShardLimitValidator; import org.opensearch.indices.analysis.HunspellService; import org.opensearch.indices.breaker.BreakerSettings; @@ -614,7 +620,6 @@ public void apply(Settings value, Settings current, Settings previous) { FsHealthService.REFRESH_INTERVAL_SETTING, FsHealthService.SLOW_PATH_LOGGING_THRESHOLD_SETTING, FsHealthService.HEALTHY_TIMEOUT_SETTING, - TransportMainAction.OVERRIDE_MAIN_RESPONSE_VERSION, NodeLoadAwareAllocationDecider.CLUSTER_ROUTING_ALLOCATION_LOAD_AWARENESS_PROVISIONED_CAPACITY_SETTING, NodeLoadAwareAllocationDecider.CLUSTER_ROUTING_ALLOCATION_LOAD_AWARENESS_SKEW_FACTOR_SETTING, NodeLoadAwareAllocationDecider.CLUSTER_ROUTING_ALLOCATION_LOAD_AWARENESS_ALLOW_UNASSIGNED_PRIMARIES_SETTING, @@ -702,10 +707,12 @@ public void apply(Settings value, Settings current, Settings previous) { SearchRequestSlowLog.CLUSTER_SEARCH_REQUEST_SLOWLOG_LEVEL, // Remote cluster state settings + RemoteClusterStateCleanupManager.REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING, RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING, - RemoteClusterStateService.INDEX_METADATA_UPLOAD_TIMEOUT_SETTING, - RemoteClusterStateService.GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING, - RemoteClusterStateService.METADATA_MANIFEST_UPLOAD_TIMEOUT_SETTING, + RemoteClusterStateService.REMOTE_STATE_READ_TIMEOUT_SETTING, + RemoteIndexMetadataManager.INDEX_METADATA_UPLOAD_TIMEOUT_SETTING, + RemoteGlobalMetadataManager.GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING, + RemoteManifestManager.METADATA_MANIFEST_UPLOAD_TIMEOUT_SETTING, RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING, RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING, IndicesService.CLUSTER_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING, @@ -723,8 +730,10 @@ public void apply(Settings value, Settings current, Settings previous) { CpuBasedAdmissionControllerSettings.CLUSTER_ADMIN_CPU_USAGE_LIMIT, IoBasedAdmissionControllerSettings.IO_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE, IoBasedAdmissionControllerSettings.SEARCH_IO_USAGE_LIMIT, - IoBasedAdmissionControllerSettings.INDEXING_IO_USAGE_LIMIT + IoBasedAdmissionControllerSettings.INDEXING_IO_USAGE_LIMIT, + // Remote Routing table settings + RemoteRoutingTableService.REMOTE_ROUTING_TABLE_ENABLED_SETTING ) ) ); diff --git a/server/src/main/java/org/opensearch/common/settings/FeatureFlagSettings.java b/server/src/main/java/org/opensearch/common/settings/FeatureFlagSettings.java index 0e1b353fb41b1..9e5c9cf00f7d0 100644 --- a/server/src/main/java/org/opensearch/common/settings/FeatureFlagSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/FeatureFlagSettings.java @@ -36,6 +36,7 @@ protected FeatureFlagSettings( FeatureFlags.DATETIME_FORMATTER_CACHING_SETTING, FeatureFlags.WRITEABLE_REMOTE_INDEX_SETTING, FeatureFlags.PLUGGABLE_CACHE_SETTING, - FeatureFlags.REMOTE_STORE_MIGRATION_EXPERIMENTAL_SETTING + FeatureFlags.REMOTE_STORE_MIGRATION_EXPERIMENTAL_SETTING, + FeatureFlags.REMOTE_ROUTING_TABLE_EXPERIMENTAL_SETTING ); } diff --git a/server/src/main/java/org/opensearch/common/util/FeatureFlags.java b/server/src/main/java/org/opensearch/common/util/FeatureFlags.java index 8633cf1fe25ea..196b180eb7221 100644 --- a/server/src/main/java/org/opensearch/common/util/FeatureFlags.java +++ b/server/src/main/java/org/opensearch/common/util/FeatureFlags.java @@ -12,9 +12,11 @@ import org.opensearch.common.settings.Setting.Property; import org.opensearch.common.settings.Settings; +import java.util.List; + /** * Utility class to manage feature flags. Feature flags are system properties that must be set on the JVM. - * These are used to gate the visibility/availability of incomplete features. Fore more information, see + * These are used to gate the visibility/availability of incomplete features. For more information, see * https://featureflags.io/feature-flag-introduction/ * * @opensearch.internal @@ -48,6 +50,18 @@ public class FeatureFlags { */ public static final String TELEMETRY = "opensearch.experimental.feature.telemetry.enabled"; + /** + * Gates the functionality of remote routing table. + */ + public static final String REMOTE_ROUTING_TABLE_EXPERIMENTAL = "opensearch.experimental.feature.remote_store.routing.enabled"; + + + public static final Setting REMOTE_ROUTING_TABLE_EXPERIMENTAL_SETTING = Setting.boolSetting( + REMOTE_ROUTING_TABLE_EXPERIMENTAL, + true, + Property.NodeScope + ); + /** * Gates the optimization of datetime formatters caching along with change in default datetime formatter. */ diff --git a/server/src/main/java/org/opensearch/discovery/DiscoveryModule.java b/server/src/main/java/org/opensearch/discovery/DiscoveryModule.java index 288371aa240a0..2edb9a61a85a8 100644 --- a/server/src/main/java/org/opensearch/discovery/DiscoveryModule.java +++ b/server/src/main/java/org/opensearch/discovery/DiscoveryModule.java @@ -52,6 +52,7 @@ import org.opensearch.core.common.io.stream.NamedWriteableRegistry; import org.opensearch.core.common.transport.TransportAddress; import org.opensearch.gateway.GatewayMetaState; +import org.opensearch.gateway.remote.RemoteClusterStateService; import org.opensearch.monitor.NodeHealthService; import org.opensearch.node.remotestore.RemoteStoreNodeService; import org.opensearch.plugins.DiscoveryPlugin; @@ -133,7 +134,8 @@ public DiscoveryModule( RerouteService rerouteService, NodeHealthService nodeHealthService, PersistedStateRegistry persistedStateRegistry, - RemoteStoreNodeService remoteStoreNodeService + RemoteStoreNodeService remoteStoreNodeService, + RemoteClusterStateService remoteClusterStateService ) { final Collection> joinValidators = new ArrayList<>(); final Map> hostProviders = new HashMap<>(); @@ -211,7 +213,8 @@ public DiscoveryModule( electionStrategy, nodeHealthService, persistedStateRegistry, - remoteStoreNodeService + remoteStoreNodeService, + remoteClusterStateService ); } else { throw new IllegalArgumentException("Unknown discovery type [" + discoveryType + "]"); diff --git a/server/src/main/java/org/opensearch/gateway/remote/AbstractRemoteBlobStoreObject.java b/server/src/main/java/org/opensearch/gateway/remote/AbstractRemoteBlobStoreObject.java new file mode 100644 index 0000000000000..542a7074528a6 --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/AbstractRemoteBlobStoreObject.java @@ -0,0 +1,114 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + +import static org.opensearch.gateway.remote.RemoteClusterStateUtils.PATH_DELIMITER; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; +import java.util.concurrent.ExecutorService; +import org.opensearch.common.CheckedRunnable; +import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.common.blobstore.stream.write.WritePriority; +import org.opensearch.core.action.ActionListener; +import org.opensearch.core.compress.Compressor; +import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedMetadata; +import org.opensearch.index.translog.transfer.BlobStoreTransferService; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.threadpool.ThreadPool; + +public abstract class AbstractRemoteBlobStoreObject implements RemoteObject { + + private final BlobStoreTransferService transferService; + private final BlobStoreRepository blobStoreRepository; + private final String clusterName; + private final ExecutorService executorService; + + public AbstractRemoteBlobStoreObject(BlobStoreTransferService blobStoreTransferService, BlobStoreRepository blobStoreRepository, String clusterName, + ThreadPool threadPool) { + this.transferService = blobStoreTransferService; + this.blobStoreRepository = blobStoreRepository; + this.clusterName = clusterName; + this.executorService = threadPool.executor(ThreadPool.Names.GENERIC); + } + + public abstract BlobPathParameters getBlobPathParameters(); + + public abstract String getFullBlobName(); + + public String getBlobFileName() { + if (getFullBlobName() == null) { + generateBlobFileName(); + } + String[] pathTokens = getFullBlobName().split(PATH_DELIMITER); + return getFullBlobName().split(PATH_DELIMITER)[pathTokens.length - 1]; + } + + public abstract String generateBlobFileName(); + + public abstract UploadedMetadata getUploadedMetadata(); + + @Override + public CheckedRunnable writeAsync(ActionListener listener) { + return () -> { + assert get() != null; + InputStream inputStream = serialize(); + transferService.uploadBlob(inputStream, getBlobPathForUpload(), getBlobFileName(), WritePriority.URGENT, listener); + }; + } + + @Override + public T read() throws IOException { + assert getFullBlobName() != null; + return deserialize( + transferService.downloadBlob(getBlobPathForDownload(), getBlobFileName())); + } + + @Override + public void readAsync(ActionListener listener) { + executorService.execute(() -> { + try { + listener.onResponse(read()); + } catch (Exception e) { + listener.onFailure(e); + } + }); + } + + public BlobPath getBlobPathForUpload() { + BlobPath blobPath = blobStoreRepository.basePath().add(RemoteClusterStateUtils.encodeString(clusterName)).add("cluster-state").add(clusterUUID()); + for (String token : getBlobPathParameters().getPathTokens()) { + blobPath = blobPath.add(token); + } + return blobPath; + } + + public BlobPath getBlobPathForDownload() { + String[] pathTokens = extractBlobPathTokens(getFullBlobName()); + BlobPath blobPath = blobStoreRepository.basePath(); + for (String token : pathTokens) { + blobPath = blobPath.add(token); + } + return blobPath; + } + + protected Compressor getCompressor() { + return blobStoreRepository.getCompressor(); + } + + protected BlobStoreRepository getBlobStoreRepository() { + return this.blobStoreRepository; + } + + private static String[] extractBlobPathTokens(String blobName) { + String[] blobNameTokens = blobName.split(PATH_DELIMITER); + return Arrays.copyOfRange(blobNameTokens, 0, blobNameTokens.length - 1); + } +} diff --git a/server/src/main/java/org/opensearch/gateway/remote/BlobPathParameters.java b/server/src/main/java/org/opensearch/gateway/remote/BlobPathParameters.java new file mode 100644 index 0000000000000..4b47bd744ef1a --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/BlobPathParameters.java @@ -0,0 +1,30 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + +import java.util.List; + +public class BlobPathParameters { + + private List pathTokens; + private String filePrefix; + + public BlobPathParameters(List pathTokens, String filePrefix) { + this.pathTokens = pathTokens; + this.filePrefix = filePrefix; + } + + public List getPathTokens() { + return pathTokens; + } + + public String getFilePrefix() { + return filePrefix; + } +} diff --git a/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java b/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java index 4725f40076ce2..aa179092949e3 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java +++ b/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java @@ -8,6 +8,8 @@ package org.opensearch.gateway.remote; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.opensearch.Version; import org.opensearch.core.ParseField; import org.opensearch.core.common.Strings; @@ -16,6 +18,7 @@ import org.opensearch.core.common.io.stream.Writeable; import org.opensearch.core.xcontent.ConstructingObjectParser; import org.opensearch.core.xcontent.MediaTypeRegistry; +import org.opensearch.core.xcontent.ObjectParser; import org.opensearch.core.xcontent.ToXContentFragment; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.core.xcontent.XContentParser; @@ -23,8 +26,12 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Objects; +import java.util.function.Function; +import java.util.stream.Collectors; /** * Manifest file which contains the details of the uploaded entity metadata @@ -35,6 +42,8 @@ public class ClusterMetadataManifest implements Writeable, ToXContentFragment { public static final int CODEC_V0 = 0; // Older codec version, where we haven't introduced codec versions for manifest. public static final int CODEC_V1 = 1; // In Codec V1 we have introduced global-metadata and codec version in Manifest file. + public static final int CODEC_V2 = 2; // In Codec V2, there are seperate metadata files rather than a single global metadata file. + public static final int CODEC_V3 = 3; // In Codec V3, we introduced index routing-metadata, diff as part of manifest. private static final ParseField CLUSTER_TERM_FIELD = new ParseField("cluster_term"); private static final ParseField STATE_VERSION_FIELD = new ParseField("state_version"); @@ -48,6 +57,53 @@ public class ClusterMetadataManifest implements Writeable, ToXContentFragment { private static final ParseField INDICES_FIELD = new ParseField("indices"); private static final ParseField PREVIOUS_CLUSTER_UUID = new ParseField("previous_cluster_uuid"); private static final ParseField CLUSTER_UUID_COMMITTED = new ParseField("cluster_uuid_committed"); + private static final ParseField METADATA_VERSION = new ParseField("metadata_version"); + private static final ParseField UPLOADED_COORDINATOR_METADATA = new ParseField("uploaded_coordinator_metadata"); + private static final ParseField UPLOADED_SETTINGS_METADATA = new ParseField("uploaded_settings_metadata"); + private static final ParseField UPLOADED_TEMPLATES_METADATA = new ParseField("uploaded_templates_metadata"); + private static final ParseField UPLOADED_CUSTOM_METADATA = new ParseField("uploaded_custom_metadata"); + private static final ParseField UPLOADED_DISCOVERY_NODES_METADATA = new ParseField("uploaded_discovery_nodes_metadata"); + private static final ParseField UPLOADED_CLUSTER_BLOCKS_METADATA = new ParseField("uploaded_cluster_blocks_metadata"); + private static final ParseField DIFF_MANIFEST = new ParseField("diff_manifest"); + private static final ParseField ROUTING_TABLE_VERSION_FIELD = new ParseField("routing_table_version"); + private static final ParseField INDICES_ROUTING_FIELD = new ParseField("indices_routing"); + + private static ClusterMetadataManifest.Builder manifestV0Builder(Object[] fields) { + return ClusterMetadataManifest.builder() + .clusterTerm(term(fields)) + .stateVersion(version(fields)) + .clusterUUID(clusterUUID(fields)) + .stateUUID(stateUUID(fields)) + .opensearchVersion(opensearchVersion(fields)) + .nodeId(nodeId(fields)) + .committed(committed(fields)) + .codecVersion(CODEC_V0) + .indices(indices(fields)) + .previousClusterUUID(previousClusterUUID(fields)) + .clusterUUIDCommitted(clusterUUIDCommitted(fields)); + } + + private static ClusterMetadataManifest.Builder manifestV1Builder(Object[] fields) { + return manifestV0Builder(fields).codecVersion(codecVersion(fields)).globalMetadataFileName(globalMetadataFileName(fields)); + } + + private static ClusterMetadataManifest.Builder manifestV2Builder(Object[] fields) { + return manifestV0Builder(fields).codecVersion(codecVersion(fields)) + .coordinationMetadata(coordinationMetadata(fields)) + .settingMetadata(settingsMetadata(fields)) + .templatesMetadata(templatesMetadata(fields)) + .customMetadataMap(customMetadata(fields)); + } + + private static ClusterMetadataManifest.Builder manifestV3Builder(Object[] fields) { + return manifestV2Builder(fields) + .discoveryNodesMetadata(discoveryNodesMetadata(fields)) + .clusterBlocksMetadata(clusterBlocksMetadata(fields)) + .diffManifest(diffManifest(fields)) + .routingTableVersion(routingTableVersion(fields)) + .indicesRouting(indicesRouting(fields)) + .metadataVersion(metadataVersion(fields)); + } private static long term(Object[] fields) { return (long) fields[0]; @@ -97,47 +153,74 @@ private static String globalMetadataFileName(Object[] fields) { return (String) fields[11]; } + private static UploadedMetadataAttribute coordinationMetadata(Object[] fields) { + return (UploadedMetadataAttribute) fields[11]; + } + + private static UploadedMetadataAttribute settingsMetadata(Object[] fields) { + return (UploadedMetadataAttribute) fields[12]; + } + + private static UploadedMetadataAttribute templatesMetadata(Object[] fields) { + return (UploadedMetadataAttribute) fields[13]; + } + + private static Map customMetadata(Object[] fields) { + List customs = (List) fields[14]; + return customs.stream().collect(Collectors.toMap(UploadedMetadataAttribute::getAttributeName, Function.identity())); + } + + private static UploadedMetadataAttribute discoveryNodesMetadata(Object[] fields) { + return (UploadedMetadataAttribute) fields[15]; + } + + private static UploadedMetadataAttribute clusterBlocksMetadata(Object[] fields) { + return (UploadedMetadataAttribute) fields[16]; + } + + private static ClusterStateDiffManifest diffManifest(Object[] fields) { + return (ClusterStateDiffManifest) fields[17]; + } + + private static long routingTableVersion(Object[] fields) { + return (long) fields[18]; + } + + private static List indicesRouting(Object[] fields) { + return (List) fields[19]; + } + + private static long metadataVersion(Object[] fields) { + return (long) fields[20]; + } + private static final ConstructingObjectParser PARSER_V0 = new ConstructingObjectParser<>( "cluster_metadata_manifest", - fields -> new ClusterMetadataManifest( - term(fields), - version(fields), - clusterUUID(fields), - stateUUID(fields), - opensearchVersion(fields), - nodeId(fields), - committed(fields), - CODEC_V0, - null, - indices(fields), - previousClusterUUID(fields), - clusterUUIDCommitted(fields) - ) + fields -> manifestV0Builder(fields).build() ); private static final ConstructingObjectParser PARSER_V1 = new ConstructingObjectParser<>( "cluster_metadata_manifest", - fields -> new ClusterMetadataManifest( - term(fields), - version(fields), - clusterUUID(fields), - stateUUID(fields), - opensearchVersion(fields), - nodeId(fields), - committed(fields), - codecVersion(fields), - globalMetadataFileName(fields), - indices(fields), - previousClusterUUID(fields), - clusterUUIDCommitted(fields) - ) + fields -> manifestV1Builder(fields).build() ); - private static final ConstructingObjectParser CURRENT_PARSER = PARSER_V1; + private static final ConstructingObjectParser PARSER_V2 = new ConstructingObjectParser<>( + "cluster_metadata_manifest", + fields -> manifestV2Builder(fields).build() + ); + + private static final ConstructingObjectParser PARSER_V3 = new ConstructingObjectParser<>( + "cluster_metadata_manifest", + fields -> manifestV3Builder(fields).build() + ); + + private static final ConstructingObjectParser CURRENT_PARSER = PARSER_V3; static { declareParser(PARSER_V0, CODEC_V0); declareParser(PARSER_V1, CODEC_V1); + declareParser(PARSER_V2, CODEC_V2); + declareParser(PARSER_V3, CODEC_V3); } private static void declareParser(ConstructingObjectParser parser, long codec_version) { @@ -156,14 +239,67 @@ private static void declareParser(ConstructingObjectParser= CODEC_V1) { + if (codec_version == CODEC_V1) { parser.declareInt(ConstructingObjectParser.constructorArg(), CODEC_VERSION_FIELD); parser.declareString(ConstructingObjectParser.constructorArg(), GLOBAL_METADATA_FIELD); + } else if (codec_version >= CODEC_V2) { + parser.declareInt(ConstructingObjectParser.constructorArg(), CODEC_VERSION_FIELD); + parser.declareNamedObject( + ConstructingObjectParser.optionalConstructorArg(), + UploadedMetadataAttribute.PARSER, + UPLOADED_COORDINATOR_METADATA + ); + parser.declareNamedObject( + ConstructingObjectParser.optionalConstructorArg(), + UploadedMetadataAttribute.PARSER, + UPLOADED_SETTINGS_METADATA + ); + parser.declareNamedObject( + ConstructingObjectParser.optionalConstructorArg(), + UploadedMetadataAttribute.PARSER, + UPLOADED_TEMPLATES_METADATA + ); + parser.declareNamedObjects( + ConstructingObjectParser.optionalConstructorArg(), + UploadedMetadataAttribute.PARSER, + UPLOADED_CUSTOM_METADATA + ); + } + if (codec_version >= CODEC_V3) { + parser.declareNamedObject( + ConstructingObjectParser.optionalConstructorArg(), + UploadedMetadataAttribute.PARSER, + UPLOADED_DISCOVERY_NODES_METADATA + ); + parser.declareNamedObject( + ConstructingObjectParser.optionalConstructorArg(), + UploadedMetadataAttribute.PARSER, + UPLOADED_CLUSTER_BLOCKS_METADATA + ); + parser.declareObject( + ConstructingObjectParser.constructorArg(), + (p, c) -> ClusterStateDiffManifest.fromXContent(p), + DIFF_MANIFEST + ); + parser.declareLong(ConstructingObjectParser.constructorArg(), ROUTING_TABLE_VERSION_FIELD); + parser.declareObjectArray( + ConstructingObjectParser.constructorArg(), + (p, c) -> UploadedIndexMetadata.fromXContent(p), + INDICES_ROUTING_FIELD + ); + parser.declareLong(ConstructingObjectParser.constructorArg(), METADATA_VERSION); } } private final int codecVersion; private final String globalMetadataFileName; + private final long metadataVersion; + private final UploadedMetadataAttribute uploadedCoordinationMetadata; + private final UploadedMetadataAttribute uploadedSettingsMetadata; + private final UploadedMetadataAttribute uploadedTemplatesMetadata; + private final UploadedMetadataAttribute uploadedDiscoveryNodesMetadata; + private final UploadedMetadataAttribute uploadedClusterBlocksMetadata; + private final Map uploadedCustomMetadataMap; private final List indices; private final long clusterTerm; private final long stateVersion; @@ -174,6 +310,9 @@ private static void declareParser(ConstructingObjectParser indicesRouting; public List getIndices() { return indices; @@ -223,6 +362,54 @@ public String getGlobalMetadataFileName() { return globalMetadataFileName; } + private static final Logger logger = LogManager.getLogger(ClusterMetadataManifest.class); + + public long getMetadataVersion() { + return metadataVersion; + } + + public UploadedMetadataAttribute getCoordinationMetadata() { + return uploadedCoordinationMetadata; + } + + public UploadedMetadataAttribute getSettingsMetadata() { + return uploadedSettingsMetadata; + } + + public UploadedMetadataAttribute getTemplatesMetadata() { + return uploadedTemplatesMetadata; + } + + public UploadedMetadataAttribute getDiscoveryNodesMetadata() { + return uploadedDiscoveryNodesMetadata; + } + + public UploadedMetadataAttribute getClusterBlocksMetadata() { + return uploadedClusterBlocksMetadata; + } + + public ClusterStateDiffManifest getDiffManifest() { + return diffManifest; + } + + public Map getCustomMetadataMap() { + return uploadedCustomMetadataMap; + } + + public long getRoutingTableVersion() { + return routingTableVersion; + } + + public List getIndicesRouting() { + return indicesRouting; + } + public boolean hasMetadataAttributesFiles() { + return uploadedCoordinationMetadata != null + || uploadedSettingsMetadata != null + || uploadedTemplatesMetadata != null + || !uploadedCustomMetadataMap.isEmpty(); + } + public ClusterMetadataManifest( long clusterTerm, long version, @@ -235,7 +422,17 @@ public ClusterMetadataManifest( String globalMetadataFileName, List indices, String previousClusterUUID, - boolean clusterUUIDCommitted + boolean clusterUUIDCommitted, + UploadedMetadataAttribute uploadedCoordinationMetadata, + UploadedMetadataAttribute uploadedSettingsMetadata, + UploadedMetadataAttribute uploadedTemplatesMetadata, + Map uploadedCustomMetadataMap, + UploadedMetadataAttribute discoveryNodesMetadata, + UploadedMetadataAttribute clusterBlocksMetadata, + ClusterStateDiffManifest diffManifest, + long routingTableVersion, + List indicesRouting, + long metadataVersion ) { this.clusterTerm = clusterTerm; this.stateVersion = version; @@ -249,6 +446,18 @@ public ClusterMetadataManifest( this.indices = Collections.unmodifiableList(indices); this.previousClusterUUID = previousClusterUUID; this.clusterUUIDCommitted = clusterUUIDCommitted; + this.uploadedCoordinationMetadata = uploadedCoordinationMetadata; + this.uploadedSettingsMetadata = uploadedSettingsMetadata; + this.uploadedTemplatesMetadata = uploadedTemplatesMetadata; + this.uploadedCustomMetadataMap = Collections.unmodifiableMap( + uploadedCustomMetadataMap != null ? uploadedCustomMetadataMap : new HashMap<>() + ); + this.uploadedDiscoveryNodesMetadata = discoveryNodesMetadata; + this.uploadedClusterBlocksMetadata = clusterBlocksMetadata; + this.diffManifest = diffManifest; + this.routingTableVersion = routingTableVersion; + this.indicesRouting = Collections.unmodifiableList(indicesRouting); + this.metadataVersion = metadataVersion; } public ClusterMetadataManifest(StreamInput in) throws IOException { @@ -262,12 +471,49 @@ public ClusterMetadataManifest(StreamInput in) throws IOException { this.indices = Collections.unmodifiableList(in.readList(UploadedIndexMetadata::new)); this.previousClusterUUID = in.readString(); this.clusterUUIDCommitted = in.readBoolean(); - if (in.getVersion().onOrAfter(Version.V_2_12_0)) { + + if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + this.codecVersion = in.readInt(); + this.uploadedCoordinationMetadata = new UploadedMetadataAttribute(in); + this.uploadedSettingsMetadata = new UploadedMetadataAttribute(in); + this.uploadedTemplatesMetadata = new UploadedMetadataAttribute(in); + this.uploadedCustomMetadataMap = Collections.unmodifiableMap( + in.readMap(StreamInput::readString, UploadedMetadataAttribute::new) + ); + this.globalMetadataFileName = null; + // ToDo: change the version check and initialize these + this.uploadedDiscoveryNodesMetadata = null; + this.uploadedClusterBlocksMetadata = null; + this.diffManifest = null; + this.routingTableVersion = in.readLong(); + this.indicesRouting = Collections.unmodifiableList(in.readList(UploadedIndexMetadata::new)); + this.metadataVersion = in.readLong(); + } else if (in.getVersion().onOrAfter(Version.V_2_12_0)) { this.codecVersion = in.readInt(); this.globalMetadataFileName = in.readString(); + this.uploadedCoordinationMetadata = null; + this.uploadedSettingsMetadata = null; + this.uploadedTemplatesMetadata = null; + this.uploadedCustomMetadataMap = null; + this.routingTableVersion = -1; + this.indicesRouting = null; + this.uploadedDiscoveryNodesMetadata = null; + this.uploadedClusterBlocksMetadata = null; + this.diffManifest = null; + this.metadataVersion = -1; } else { this.codecVersion = CODEC_V0; // Default codec this.globalMetadataFileName = null; + this.uploadedCoordinationMetadata = null; + this.uploadedSettingsMetadata = null; + this.uploadedTemplatesMetadata = null; + this.uploadedCustomMetadataMap = null; + this.routingTableVersion = -1; + this.indicesRouting = null; + this.uploadedDiscoveryNodesMetadata = null; + this.uploadedClusterBlocksMetadata = null; + this.diffManifest = null; + this.metadataVersion = -1; } } @@ -281,6 +527,7 @@ public static Builder builder(ClusterMetadataManifest manifest) { @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + logger.info("CLUSTER_TERM_FIELD {} : {}", CLUSTER_TERM_FIELD.getPreferredName(), getClusterTerm()); builder.field(CLUSTER_TERM_FIELD.getPreferredName(), getClusterTerm()) .field(STATE_VERSION_FIELD.getPreferredName(), getStateVersion()) .field(CLUSTER_UUID_FIELD.getPreferredName(), getClusterUUID()) @@ -291,16 +538,68 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.startArray(INDICES_FIELD.getPreferredName()); { for (UploadedIndexMetadata uploadedIndexMetadata : indices) { + builder.startObject(); uploadedIndexMetadata.toXContent(builder, params); + builder.endObject(); } } builder.endArray(); builder.field(PREVIOUS_CLUSTER_UUID.getPreferredName(), getPreviousClusterUUID()); builder.field(CLUSTER_UUID_COMMITTED.getPreferredName(), isClusterUUIDCommitted()); - if (onOrAfterCodecVersion(CODEC_V1)) { + if (onOrAfterCodecVersion(CODEC_V2)) { + builder.field(CODEC_VERSION_FIELD.getPreferredName(), getCodecVersion()); + if (getCoordinationMetadata() != null) { + builder.startObject(UPLOADED_COORDINATOR_METADATA.getPreferredName()); + getCoordinationMetadata().toXContent(builder, params); + builder.endObject(); + } + if (getSettingsMetadata() != null) { + builder.startObject(UPLOADED_SETTINGS_METADATA.getPreferredName()); + getSettingsMetadata().toXContent(builder, params); + builder.endObject(); + } + if (getTemplatesMetadata() != null) { + builder.startObject(UPLOADED_TEMPLATES_METADATA.getPreferredName()); + getTemplatesMetadata().toXContent(builder, params); + builder.endObject(); + } + builder.startObject(UPLOADED_CUSTOM_METADATA.getPreferredName()); + for (UploadedMetadataAttribute attribute : getCustomMetadataMap().values()) { + attribute.toXContent(builder, params); + } + builder.endObject(); + } else if (onOrAfterCodecVersion(CODEC_V1)) { builder.field(CODEC_VERSION_FIELD.getPreferredName(), getCodecVersion()); builder.field(GLOBAL_METADATA_FIELD.getPreferredName(), getGlobalMetadataFileName()); } + if (onOrAfterCodecVersion(CODEC_V3)) { + if (getDiscoveryNodesMetadata() != null) { + builder.startObject(UPLOADED_DISCOVERY_NODES_METADATA.getPreferredName()); + getDiscoveryNodesMetadata().toXContent(builder, params); + builder.endObject(); + } + if (getClusterBlocksMetadata() != null) { + builder.startObject(UPLOADED_CLUSTER_BLOCKS_METADATA.getPreferredName()); + getClusterBlocksMetadata().toXContent(builder, params); + builder.endObject(); + } + if (getDiffManifest() != null) { + builder.startObject(DIFF_MANIFEST.getPreferredName()); + getDiffManifest().toXContent(builder, params); + builder.endObject(); + } + builder.field(METADATA_VERSION.getPreferredName(), getMetadataVersion()); + builder.field(ROUTING_TABLE_VERSION_FIELD.getPreferredName(), getRoutingTableVersion()); + builder.startArray(INDICES_ROUTING_FIELD.getPreferredName()); + { + for (UploadedIndexMetadata uploadedIndexMetadata : indicesRouting) { + builder.startObject(); + uploadedIndexMetadata.toXContent(builder, params); + builder.endObject(); + } + } + builder.endArray(); + } return builder; } @@ -316,7 +615,16 @@ public void writeTo(StreamOutput out) throws IOException { out.writeCollection(indices); out.writeString(previousClusterUUID); out.writeBoolean(clusterUUIDCommitted); - if (out.getVersion().onOrAfter(Version.V_2_12_0)) { + if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + out.writeInt(codecVersion); + uploadedCoordinationMetadata.writeTo(out); + uploadedSettingsMetadata.writeTo(out); + uploadedTemplatesMetadata.writeTo(out); + out.writeMap(uploadedCustomMetadataMap, StreamOutput::writeString, (o, v) -> v.writeTo(o)); + out.writeLong(routingTableVersion); + out.writeCollection(indicesRouting); + out.writeLong(metadataVersion); + } else if (out.getVersion().onOrAfter(Version.V_2_12_0)) { out.writeInt(codecVersion); out.writeString(globalMetadataFileName); } @@ -342,7 +650,9 @@ public boolean equals(Object o) { && Objects.equals(previousClusterUUID, that.previousClusterUUID) && Objects.equals(clusterUUIDCommitted, that.clusterUUIDCommitted) && Objects.equals(globalMetadataFileName, that.globalMetadataFileName) - && Objects.equals(codecVersion, that.codecVersion); + && Objects.equals(codecVersion, that.codecVersion) + && Objects.equals(routingTableVersion, that.routingTableVersion) + && Objects.equals(indicesRouting, that.indicesRouting); } @Override @@ -359,7 +669,9 @@ public int hashCode() { nodeId, committed, previousClusterUUID, - clusterUUIDCommitted + clusterUUIDCommitted, + routingTableVersion, + indicesRouting ); } @@ -376,6 +688,10 @@ public static ClusterMetadataManifest fromXContentV0(XContentParser parser) thro return PARSER_V0.parse(parser, null); } + public static ClusterMetadataManifest fromXContentV1(XContentParser parser) throws IOException { + return PARSER_V1.parse(parser, null); + } + public static ClusterMetadataManifest fromXContent(XContentParser parser) throws IOException { return CURRENT_PARSER.parse(parser, null); } @@ -388,6 +704,13 @@ public static ClusterMetadataManifest fromXContent(XContentParser parser) throws public static class Builder { private String globalMetadataFileName; + private long metadataVersion; + private UploadedMetadataAttribute discoveryNodesMetadata; + private UploadedMetadataAttribute clusterBlocksMetadata; + private UploadedMetadataAttribute coordinationMetadata; + private UploadedMetadataAttribute settingsMetadata; + private UploadedMetadataAttribute templatesMetadata; + private Map customMetadataMap; private int codecVersion; private List indices; private long clusterTerm; @@ -399,12 +722,30 @@ public static class Builder { private String previousClusterUUID; private boolean committed; private boolean clusterUUIDCommitted; + private ClusterStateDiffManifest diffManifest; + private long routingTableVersion; + private List indicesRouting; public Builder indices(List indices) { this.indices = indices; return this; } + public Builder routingTableVersion(long routingTableVersion) { + this.routingTableVersion = routingTableVersion; + return this; + } + + public Builder metadataVersion(long metadataVersion) { + this.metadataVersion = metadataVersion; + return this; + } + + public Builder indicesRouting(List indicesRouting) { + this.indicesRouting = indicesRouting; + return this; + } + public Builder codecVersion(int codecVersion) { this.codecVersion = codecVersion; return this; @@ -415,6 +756,31 @@ public Builder globalMetadataFileName(String globalMetadataFileName) { return this; } + public Builder coordinationMetadata(UploadedMetadataAttribute coordinationMetadata) { + this.coordinationMetadata = coordinationMetadata; + return this; + } + + public Builder settingMetadata(UploadedMetadataAttribute settingsMetadata) { + this.settingsMetadata = settingsMetadata; + return this; + } + + public Builder templatesMetadata(UploadedMetadataAttribute templatesMetadata) { + this.templatesMetadata = templatesMetadata; + return this; + } + + public Builder customMetadataMap(Map customMetadataMap) { + this.customMetadataMap = customMetadataMap; + return this; + } + + public Builder put(String custom, UploadedMetadataAttribute customMetadata) { + this.customMetadataMap.put(custom, customMetadata); + return this; + } + public Builder clusterTerm(long clusterTerm) { this.clusterTerm = clusterTerm; return this; @@ -454,6 +820,10 @@ public List getIndices() { return indices; } + public List getIndicesRouting() { + return indicesRouting; + } + public Builder previousClusterUUID(String previousClusterUUID) { this.previousClusterUUID = previousClusterUUID; return this; @@ -464,8 +834,24 @@ public Builder clusterUUIDCommitted(boolean clusterUUIDCommitted) { return this; } + public Builder discoveryNodesMetadata(UploadedMetadataAttribute discoveryNodesMetadata) { + this.discoveryNodesMetadata = discoveryNodesMetadata; + return this; + } + + public Builder clusterBlocksMetadata(UploadedMetadataAttribute clusterBlocksMetadata) { + this.clusterBlocksMetadata = clusterBlocksMetadata; + return this; + } + + public Builder diffManifest(ClusterStateDiffManifest diffManifest) { + this.diffManifest = diffManifest; + return this; + } + public Builder() { indices = new ArrayList<>(); + customMetadataMap = new HashMap<>(); } public Builder(ClusterMetadataManifest manifest) { @@ -477,10 +863,17 @@ public Builder(ClusterMetadataManifest manifest) { this.nodeId = manifest.nodeId; this.committed = manifest.committed; this.globalMetadataFileName = manifest.globalMetadataFileName; + this.coordinationMetadata = manifest.uploadedCoordinationMetadata; + this.settingsMetadata = manifest.uploadedSettingsMetadata; + this.templatesMetadata = manifest.uploadedTemplatesMetadata; + this.customMetadataMap = manifest.uploadedCustomMetadataMap; this.codecVersion = manifest.codecVersion; this.indices = new ArrayList<>(manifest.indices); this.previousClusterUUID = manifest.previousClusterUUID; this.clusterUUIDCommitted = manifest.clusterUUIDCommitted; + this.diffManifest = manifest.diffManifest; + this.routingTableVersion = manifest.routingTableVersion; + this.indicesRouting = new ArrayList<>(manifest.indicesRouting); } public ClusterMetadataManifest build() { @@ -496,22 +889,52 @@ public ClusterMetadataManifest build() { globalMetadataFileName, indices, previousClusterUUID, - clusterUUIDCommitted + clusterUUIDCommitted, + coordinationMetadata, + settingsMetadata, + templatesMetadata, + customMetadataMap, + discoveryNodesMetadata, + clusterBlocksMetadata, + diffManifest, + routingTableVersion, + indicesRouting, + metadataVersion ); } } + /** + * Interface representing uploaded metadata + */ + public interface UploadedMetadata { + /** + * Gets the component or part of the system this upload belongs to. + * + * @return A string identifying the component + */ + String getComponent(); + + /** + * Gets the name of the file that was uploaded + * + * @return The name of the uploaded file as a string + */ + String getUploadedFilename(); + } + /** * Metadata for uploaded index metadata * * @opensearch.internal */ - public static class UploadedIndexMetadata implements Writeable, ToXContentFragment { + public static class UploadedIndexMetadata implements UploadedMetadata, Writeable, ToXContentFragment { private static final ParseField INDEX_NAME_FIELD = new ParseField("index_name"); private static final ParseField INDEX_UUID_FIELD = new ParseField("index_uuid"); private static final ParseField UPLOADED_FILENAME_FIELD = new ParseField("uploaded_filename"); + private static final ParseField COMPONENT_PREFIX_FIELD = new ParseField("component_prefix"); private static String indexName(Object[] fields) { return (String) fields[0]; @@ -525,22 +948,34 @@ private static String uploadedFilename(Object[] fields) { return (String) fields[2]; } + private static String componentPrefix(Object[] fields) { + return (String) fields[3]; + } + private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( "uploaded_index_metadata", - fields -> new UploadedIndexMetadata(indexName(fields), indexUUID(fields), uploadedFilename(fields)) + fields -> new UploadedIndexMetadata(indexName(fields), indexUUID(fields), uploadedFilename(fields), componentPrefix(fields)) ); static { PARSER.declareString(ConstructingObjectParser.constructorArg(), INDEX_NAME_FIELD); PARSER.declareString(ConstructingObjectParser.constructorArg(), INDEX_UUID_FIELD); PARSER.declareString(ConstructingObjectParser.constructorArg(), UPLOADED_FILENAME_FIELD); + PARSER.declareString(ConstructingObjectParser.constructorArg(), COMPONENT_PREFIX_FIELD); } + public static final String COMPONENT_PREFIX = "index--"; + private final String componentPrefix; private final String indexName; private final String indexUUID; private final String uploadedFilename; public UploadedIndexMetadata(String indexName, String indexUUID, String uploadedFileName) { + this( indexName,indexUUID,uploadedFileName, COMPONENT_PREFIX); + } + + public UploadedIndexMetadata(String indexName, String indexUUID, String uploadedFileName, String componentPrefix) { + this.componentPrefix = componentPrefix; this.indexName = indexName; this.indexUUID = indexUUID; this.uploadedFilename = uploadedFileName; @@ -550,15 +985,21 @@ public UploadedIndexMetadata(StreamInput in) throws IOException { this.indexName = in.readString(); this.indexUUID = in.readString(); this.uploadedFilename = in.readString(); + this.componentPrefix = in.readString(); } public String getUploadedFilePath() { return uploadedFilename; } + @Override + public String getComponent() { + return componentPrefix + getIndexName(); + } + public String getUploadedFilename() { - String[] splitPath = uploadedFilename.split("/"); - return splitPath[splitPath.length - 1]; + // todo see how this change affects existing users + return uploadedFilename; } public String getIndexName() { @@ -569,13 +1010,18 @@ public String getIndexUUID() { return indexUUID; } + public String getComponentPrefix() { + return componentPrefix; + } + @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - return builder.startObject() + return builder .field(INDEX_NAME_FIELD.getPreferredName(), getIndexName()) .field(INDEX_UUID_FIELD.getPreferredName(), getIndexUUID()) .field(UPLOADED_FILENAME_FIELD.getPreferredName(), getUploadedFilePath()) - .endObject(); + .field(COMPONENT_PREFIX_FIELD.getPreferredName(), getComponentPrefix()); + } @Override @@ -583,6 +1029,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeString(indexName); out.writeString(indexUUID); out.writeString(uploadedFilename); + out.writeString(componentPrefix); } @Override @@ -596,12 +1043,14 @@ public boolean equals(Object o) { final UploadedIndexMetadata that = (UploadedIndexMetadata) o; return Objects.equals(indexName, that.indexName) && Objects.equals(indexUUID, that.indexUUID) - && Objects.equals(uploadedFilename, that.uploadedFilename); + && Objects.equals(uploadedFilename, that.uploadedFilename) + && Objects.equals(componentPrefix, that.componentPrefix); + } @Override public int hashCode() { - return Objects.hash(indexName, indexUUID, uploadedFilename); + return Objects.hash(indexName, indexUUID, uploadedFilename, componentPrefix); } @Override @@ -613,4 +1062,83 @@ public static UploadedIndexMetadata fromXContent(XContentParser parser) throws I return PARSER.parse(parser, null); } } + + /** + * Metadata for uploaded metadata attribute + * + * @opensearch.internal + */ + public static class UploadedMetadataAttribute implements UploadedMetadata, Writeable, ToXContentFragment { + private static final ParseField UPLOADED_FILENAME_FIELD = new ParseField("uploaded_filename"); + + private static final ObjectParser.NamedObjectParser PARSER; + + static { + ConstructingObjectParser innerParser = new ConstructingObjectParser<>( + "uploaded_metadata_attribute", + true, + (Object[] parsedObject, String name) -> { + String uploadedFilename = (String) parsedObject[0]; + return new UploadedMetadataAttribute(name, uploadedFilename); + } + ); + innerParser.declareString(ConstructingObjectParser.constructorArg(), UPLOADED_FILENAME_FIELD); + PARSER = ((p, c, name) -> innerParser.parse(p, name)); + } + + private final String attributeName; + private final String uploadedFilename; + + public UploadedMetadataAttribute(String attributeName, String uploadedFilename) { + this.attributeName = attributeName; + this.uploadedFilename = uploadedFilename; + } + + public UploadedMetadataAttribute(StreamInput in) throws IOException { + this.attributeName = in.readString(); + this.uploadedFilename = in.readString(); + } + + public String getAttributeName() { + return attributeName; + } + + @Override + public String getComponent() { + return getAttributeName(); + } + + public String getUploadedFilename() { + return uploadedFilename; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(attributeName); + out.writeString(uploadedFilename); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + return builder.startObject(getAttributeName()) + .field(UPLOADED_FILENAME_FIELD.getPreferredName(), getUploadedFilename()) + .endObject(); + } + + public static UploadedMetadataAttribute fromXContent(XContentParser parser) throws IOException { + return PARSER.parse(parser, null, parser.currentName()); + } + + @Override + public String toString() { + return "UploadedMetadataAttribute{" + + "attributeName='" + + attributeName + + '\'' + + ", uploadedFilename='" + + uploadedFilename + + '\'' + + '}'; + } + } } diff --git a/server/src/main/java/org/opensearch/gateway/remote/ClusterStateDiffManifest.java b/server/src/main/java/org/opensearch/gateway/remote/ClusterStateDiffManifest.java new file mode 100644 index 0000000000000..3e65c005a724c --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/ClusterStateDiffManifest.java @@ -0,0 +1,494 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + +import org.apache.lucene.util.packed.DirectMonotonicReader; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.routing.IndexRoutingTable; +import org.opensearch.cluster.routing.RoutingTable; +import org.opensearch.core.common.Strings; +import org.opensearch.core.xcontent.MediaTypeRegistry; +import org.opensearch.core.xcontent.ToXContentObject; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.core.xcontent.XContentParseException; +import org.opensearch.core.xcontent.XContentParser; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.opensearch.core.xcontent.XContentParserUtils.ensureExpectedToken; +import static org.opensearch.core.xcontent.XContentParserUtils.parseStringList; + +public class ClusterStateDiffManifest implements ToXContentObject { + private static final String FROM_STATE_UUID_FIELD = "from_state_uuid"; + private static final String TO_STATE_UUID_FIELD = "to_state_uuid"; + private static final String METADATA_DIFF_FIELD = "metadata_diff"; + private static final String COORDINATION_METADATA_UPDATED_FIELD = "coordination_metadata_diff"; + private static final String SETTINGS_METADATA_UPDATED_FIELD = "settings_metadata_diff"; + private static final String TEMPLATES_METADATA_UPDATED_FIELD = "templates_metadata_diff"; + private static final String INDICES_DIFF_FIELD = "indices_diff"; + private static final String METADATA_CUSTOM_DIFF_FIELD = "metadata_custom_diff"; + private static final String UPSERTS_FIELD = "upserts"; + private static final String DELETES_FIELD = "deletes"; + private static final String CLUSTER_BLOCKS_UPDATED_FIELD = "cluster_blocks_diff"; + private static final String DISCOVERY_NODES_UPDATED_FIELD = "discovery_nodes_diff"; + private static final String ROUTING_TABLE_DIFF = "routing_table_diff"; + private final String fromStateUUID; + private final String toStateUUID; + private final boolean coordinationMetadataUpdated; + private final boolean settingsMetadataUpdated; + private final boolean templatesMetadataUpdated; + private List customMetadataUpdated; + private final List customMetadataDeleted; + private final List indicesUpdated; + private final List indicesDeleted; + private final boolean clusterBlocksUpdated; + private final boolean discoveryNodesUpdated; + private final List indicesRoutingUpdated; + private final List indicesRoutingDeleted; + + ClusterStateDiffManifest(ClusterState state, ClusterState previousState) { + fromStateUUID = previousState.stateUUID(); + toStateUUID = state.stateUUID(); + coordinationMetadataUpdated = !Metadata.isCoordinationMetadataEqual(state.metadata(), previousState.metadata()); + settingsMetadataUpdated = !Metadata.isSettingsMetadataEqual(state.metadata(), previousState.metadata()); + templatesMetadataUpdated = !Metadata.isTemplatesMetadataEqual(state.metadata(), previousState.metadata()); + indicesDeleted = findRemovedIndices(state.metadata().indices(), previousState.metadata().indices()); + indicesUpdated = findUpdatedIndices(state.metadata().indices(), previousState.metadata().indices()); + clusterBlocksUpdated = !state.blocks().equals(previousState.blocks()); + discoveryNodesUpdated = state.nodes().delta(previousState.nodes()).hasChanges(); + customMetadataUpdated = new ArrayList<>(); + for (String custom : state.metadata().customs().keySet()) { + if (!state.metadata().customs().get(custom).equals(previousState.metadata().customs().get(custom))) { + customMetadataUpdated.add(custom); + } + } + customMetadataDeleted = new ArrayList<>(); + for (String custom : previousState.metadata().customs().keySet()) { + if (state.metadata().customs().get(custom) == null) { + customMetadataDeleted.add(custom); + } + } + indicesRoutingUpdated = getIndicesRoutingUpdated(previousState.routingTable(), state.routingTable()); + indicesRoutingDeleted = getIndicesRoutingDeleted(previousState.routingTable(), state.routingTable()); + } + + public ClusterStateDiffManifest(String fromStateUUID, + String toStateUUID, + boolean coordinationMetadataUpdated, + boolean settingsMetadataUpdated, + boolean templatesMetadataUpdated, + List customMetadataUpdated, + List customMetadataDeleted, + List indicesUpdated, + List indicesDeleted, + boolean clusterBlocksUpdated, + boolean discoveryNodesUpdated, + ListindicesRoutingUpdated, + ListindicesRoutingDeleted) { + this.fromStateUUID = fromStateUUID; + this.toStateUUID = toStateUUID; + this.coordinationMetadataUpdated = coordinationMetadataUpdated; + this.settingsMetadataUpdated = settingsMetadataUpdated; + this.templatesMetadataUpdated = templatesMetadataUpdated; + this.customMetadataUpdated = customMetadataUpdated; + this.customMetadataDeleted = customMetadataDeleted; + this.indicesUpdated = indicesUpdated; + this.indicesDeleted = indicesDeleted; + this.clusterBlocksUpdated = clusterBlocksUpdated; + this.discoveryNodesUpdated = discoveryNodesUpdated; + this.indicesRoutingUpdated = indicesRoutingUpdated; + this.indicesRoutingDeleted = indicesRoutingDeleted; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + { + builder.field(FROM_STATE_UUID_FIELD, fromStateUUID); + builder.field(TO_STATE_UUID_FIELD, toStateUUID); + builder.startObject(METADATA_DIFF_FIELD); + { + builder.field(COORDINATION_METADATA_UPDATED_FIELD, coordinationMetadataUpdated); + builder.field(SETTINGS_METADATA_UPDATED_FIELD, settingsMetadataUpdated); + builder.field(TEMPLATES_METADATA_UPDATED_FIELD, templatesMetadataUpdated); + builder.startObject(INDICES_DIFF_FIELD); + builder.startArray(UPSERTS_FIELD); + for (String index : indicesUpdated) { + builder.value(index); + } + builder.endArray(); + builder.startArray(DELETES_FIELD); + for (String index : indicesDeleted) { + builder.value(index); + } + builder.endArray(); + builder.endObject(); + builder.startObject(METADATA_CUSTOM_DIFF_FIELD); + builder.startArray(UPSERTS_FIELD); + for (String custom : customMetadataUpdated) { + builder.value(custom); + } + builder.endArray(); + builder.startArray(DELETES_FIELD); + for (String custom : customMetadataDeleted) { + builder.value(custom); + } + builder.endArray(); + builder.endObject(); + } + builder.endObject(); + builder.field(CLUSTER_BLOCKS_UPDATED_FIELD, clusterBlocksUpdated); + builder.field(DISCOVERY_NODES_UPDATED_FIELD, discoveryNodesUpdated); + + builder.startObject(ROUTING_TABLE_DIFF); + builder.startArray(UPSERTS_FIELD); + for (String index : indicesRoutingUpdated) { + builder.value(index); + } + builder.endArray(); + builder.startArray(DELETES_FIELD); + for (String index : indicesRoutingDeleted) { + builder.value(index); + } + builder.endArray(); + builder.endObject(); + } + return builder; + } + + public static ClusterStateDiffManifest fromXContent(XContentParser parser) throws IOException { + Builder builder = new Builder(); + if (parser.currentToken() == null) { // fresh parser? move to next token + parser.nextToken(); + } + if (parser.currentToken() == XContentParser.Token.START_OBJECT) { + parser.nextToken(); + } + ensureExpectedToken(XContentParser.Token.FIELD_NAME, parser.currentToken(), parser); + String currentFieldName = parser.currentName(); + XContentParser.Token token; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + } else if (token == XContentParser.Token.START_OBJECT) { + if (currentFieldName.equals(METADATA_DIFF_FIELD)) { + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + currentFieldName = parser.currentName(); + token = parser.nextToken(); + if (token.isValue()) { + switch (currentFieldName) { + case COORDINATION_METADATA_UPDATED_FIELD: + builder.coordinationMetadataUpdated(parser.booleanValue()); + break; + case SETTINGS_METADATA_UPDATED_FIELD: + builder.settingsMetadataUpdated(parser.booleanValue()); + break; + case TEMPLATES_METADATA_UPDATED_FIELD: + builder.templatesMetadataUpdated(parser.booleanValue()); + break; + default: + throw new XContentParseException("Unexpected field [" + currentFieldName + "]"); + } + } else if (token == XContentParser.Token.START_OBJECT) { + if (currentFieldName.equals(INDICES_DIFF_FIELD)) { + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + currentFieldName = parser.currentName(); + token = parser.nextToken(); + switch (currentFieldName) { + case UPSERTS_FIELD: + builder.indicesUpdated(parseStringList(parser)); + break; + case DELETES_FIELD: + builder.indicesDeleted(parseStringList(parser)); + break; + default: + throw new XContentParseException("Unexpected field [" + currentFieldName + "]"); + } + } + } else if (currentFieldName.equals(METADATA_CUSTOM_DIFF_FIELD)) { + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + currentFieldName = parser.currentName(); + token = parser.nextToken(); + switch (currentFieldName) { + case UPSERTS_FIELD: + builder.customMetadataUpdated(parseStringList(parser)); + break; + case DELETES_FIELD: + builder.customMetadataDeleted(parseStringList(parser)); + break; + default: + throw new XContentParseException("Unexpected field [" + currentFieldName + "]"); + } + } + } else { + throw new XContentParseException("Unexpected field [" + currentFieldName + "]"); + } + } else { + throw new XContentParseException("Unexpected token [" + token + "]"); + } + } + } else if (currentFieldName.equals(ROUTING_TABLE_DIFF)) { + while ((parser.nextToken()) != XContentParser.Token.END_OBJECT) { + currentFieldName = parser.currentName(); + parser.nextToken(); + switch (currentFieldName) { + case UPSERTS_FIELD: + builder.indicesRoutingUpdated(parseStringList(parser)); + break; + case DELETES_FIELD: + builder.indicesRoutingDeleted(parseStringList(parser)); + break; + default: + throw new XContentParseException("Unexpected field [" + currentFieldName + "]"); + } + } + } else { + throw new XContentParseException("Unexpected field [" + currentFieldName + "]"); + } + } else if (token.isValue()) { + switch (currentFieldName) { + case FROM_STATE_UUID_FIELD: + builder.fromStateUUID(parser.text()); + break; + case TO_STATE_UUID_FIELD: + builder.toStateUUID(parser.text()); + break; + case CLUSTER_BLOCKS_UPDATED_FIELD: + builder.clusterBlocksUpdated(parser.booleanValue()); + break; + case DISCOVERY_NODES_UPDATED_FIELD: + builder.discoveryNodesUpdated(parser.booleanValue()); + break; + default: + throw new XContentParseException("Unexpected field [" + currentFieldName + "]"); + } + } else { + throw new XContentParseException("Unexpected token [" + token + "]"); + } + } + return builder.build(); + } + + @Override + public String toString() { + return Strings.toString(MediaTypeRegistry.JSON, this); + } + + public List findRemovedIndices(Map indices, Map previousIndices) { + List removedIndices = new ArrayList<>(); + for (String index : previousIndices.keySet()) { + // index present in previous state but not in current + if (!indices.containsKey(index)) { + removedIndices.add(index); + } + } + return removedIndices; + } + + public List findUpdatedIndices(Map indices, Map previousIndices) { + List updatedIndices = new ArrayList<>(); + for (String index : indices.keySet()) { + if (!previousIndices.containsKey(index)) { + updatedIndices.add(index); + } else if (previousIndices.get(index).getVersion() != indices.get(index).getVersion()) { + updatedIndices.add(index); + } + } + return updatedIndices; + } + + public List getIndicesRoutingDeleted(RoutingTable previousRoutingTable, RoutingTable currentRoutingTable) { + List deletedIndicesRouting = new ArrayList<>(); + for(IndexRoutingTable previousIndexRouting: previousRoutingTable.getIndicesRouting().values()) { + if(!currentRoutingTable.getIndicesRouting().containsKey(previousIndexRouting.getIndex().getName())) { + // Latest Routing Table does not have entry for the index which means the index is deleted + deletedIndicesRouting.add(previousIndexRouting.getIndex().getName()); + } + } + return deletedIndicesRouting; + } + + public List getIndicesRoutingUpdated(RoutingTable previousRoutingTable, RoutingTable currentRoutingTable) { + List updatedIndicesRouting = new ArrayList<>(); + for(IndexRoutingTable currentIndicesRouting: currentRoutingTable.getIndicesRouting().values()) { + if(!previousRoutingTable.getIndicesRouting().containsKey(currentIndicesRouting.getIndex().getName())) { + // Latest Routing Table does not have entry for the index which means the index is created + updatedIndicesRouting.add(currentIndicesRouting.getIndex().getName()); + } else { + if(previousRoutingTable.getIndicesRouting().get(currentIndicesRouting.getIndex().getName()).equals(currentIndicesRouting)) { + // if the latest routing table has the same routing table as the previous routing table, then the index is not updated + continue; + } + updatedIndicesRouting.add(currentIndicesRouting.getIndex().getName()); + } + } + return updatedIndicesRouting; + } + + public String getFromStateUUID() { + return fromStateUUID; + } + + public String getToStateUUID() { + return toStateUUID; + } + + public boolean isCoordinationMetadataUpdated() { + return coordinationMetadataUpdated; + } + + public boolean isSettingsMetadataUpdated() { + return settingsMetadataUpdated; + } + + public boolean isTemplatesMetadataUpdated() { + return templatesMetadataUpdated; + } + + public List getCustomMetadataUpdated() { + return customMetadataUpdated; + } + + public List getCustomMetadataDeleted() { + return customMetadataDeleted; + } + + public List getIndicesUpdated() { + return indicesUpdated; + } + + public List getIndicesDeleted() { + return indicesDeleted; + } + + public boolean isClusterBlocksUpdated() { + return clusterBlocksUpdated; + } + + public boolean isDiscoveryNodesUpdated() { + return discoveryNodesUpdated; + } + + public List getIndicesRoutingUpdated() { + return indicesRoutingUpdated; + } + + public List getIndicesRoutingDeleted() { + return indicesRoutingDeleted; + } + + public static Builder builder() { + return new Builder(); + } + + public static class Builder { + private String fromStateUUID; + private String toStateUUID; + private boolean coordinationMetadataUpdated; + private boolean settingsMetadataUpdated; + private boolean templatesMetadataUpdated; + private List customMetadataUpdated; + private List customMetadataDeleted; + private List indicesUpdated; + private List indicesDeleted; + private boolean clusterBlocksUpdated; + private boolean discoveryNodesUpdated; + private List indicesRoutingUpdated; + private List indicesRoutingDeleted; + public Builder() {} + + public Builder fromStateUUID(String fromStateUUID) { + this.fromStateUUID = fromStateUUID; + return this; + } + + public Builder toStateUUID(String toStateUUID) { + this.toStateUUID = toStateUUID; + return this; + } + + public Builder coordinationMetadataUpdated(boolean coordinationMetadataUpdated) { + this.coordinationMetadataUpdated = coordinationMetadataUpdated; + return this; + } + + public Builder settingsMetadataUpdated(boolean settingsMetadataUpdated) { + this.settingsMetadataUpdated = settingsMetadataUpdated; + return this; + } + + public Builder templatesMetadataUpdated(boolean templatesMetadataUpdated) { + this.templatesMetadataUpdated = templatesMetadataUpdated; + return this; + } + + public Builder customMetadataUpdated(List customMetadataUpdated) { + this.customMetadataUpdated = customMetadataUpdated; + return this; + } + + public Builder customMetadataDeleted(List customMetadataDeleted) { + this.customMetadataDeleted = customMetadataDeleted; + return this; + } + + public Builder indicesUpdated(List indicesUpdated) { + this.indicesUpdated = indicesUpdated; + return this; + } + + public Builder indicesDeleted(List indicesDeleted) { + this.indicesDeleted = indicesDeleted; + return this; + } + + public Builder clusterBlocksUpdated(boolean clusterBlocksUpdated) { + this.clusterBlocksUpdated = clusterBlocksUpdated; + return this; + } + + public Builder discoveryNodesUpdated(boolean discoveryNodesUpdated) { + this.discoveryNodesUpdated = discoveryNodesUpdated; + return this; + } + + public Builder indicesRoutingUpdated(List indicesRoutingUpdated) { + this.indicesRoutingUpdated = indicesRoutingUpdated; + return this; + } + + public Builder indicesRoutingDeleted(List indicesRoutingDeleted) { + this.indicesRoutingDeleted = indicesRoutingDeleted; + return this; + } + + public ClusterStateDiffManifest build() { + return new ClusterStateDiffManifest( + fromStateUUID, + toStateUUID, + coordinationMetadataUpdated, + settingsMetadataUpdated, + templatesMetadataUpdated, + customMetadataUpdated, + customMetadataDeleted, + indicesUpdated, + indicesDeleted, + clusterBlocksUpdated, + discoveryNodesUpdated, + indicesRoutingUpdated, + indicesRoutingDeleted + ); + } + } +} diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterBlocks.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterBlocks.java new file mode 100644 index 0000000000000..8239ca76d3b6b --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterBlocks.java @@ -0,0 +1,109 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + +import static org.opensearch.gateway.remote.RemoteClusterStateAttributesManager.CLUSTER_STATE_ATTRIBUTES_CURRENT_CODEC_VERSION; +import static org.opensearch.gateway.remote.RemoteClusterStateUtils.DELIMITER; +import static org.opensearch.gateway.remote.RemoteClusterStateUtils.METADATA_NAME_FORMAT; + +import java.io.IOException; +import java.io.InputStream; +import java.util.List; +import org.opensearch.cluster.block.ClusterBlocks; +import org.opensearch.common.io.Streams; +import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedMetadata; +import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedMetadataAttribute; +import org.opensearch.index.remote.RemoteStoreUtils; +import org.opensearch.index.translog.transfer.BlobStoreTransferService; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.repositories.blobstore.ChecksumBlobStoreFormat; +import org.opensearch.threadpool.ThreadPool; + +public class RemoteClusterBlocks extends AbstractRemoteBlobStoreObject { + + public static final String CLUSTER_BLOCKS = "blocks"; + public static final ChecksumBlobStoreFormat CLUSTER_BLOCKS_FORMAT = new ChecksumBlobStoreFormat<>( + "blocks", + METADATA_NAME_FORMAT, + ClusterBlocks::fromXContent + ); + + private ClusterBlocks clusterBlocks; + private long stateVersion; + private String blobName; + private final String clusterUUID; + + public RemoteClusterBlocks(ClusterBlocks clusterBlocks, long stateVersion, String clusterUUID, BlobStoreTransferService blobStoreTransferService, + BlobStoreRepository blobStoreRepository, String clusterName, + ThreadPool threadPool) { + super(blobStoreTransferService, blobStoreRepository, clusterName, threadPool); + this.clusterBlocks = clusterBlocks; + this.stateVersion = stateVersion; + this.clusterUUID = clusterUUID; + } + + public RemoteClusterBlocks(String blobName, String clusterUUID, BlobStoreTransferService blobStoreTransferService, BlobStoreRepository blobStoreRepository, + String clusterName, + ThreadPool threadPool) { + super(blobStoreTransferService, blobStoreRepository, clusterName, threadPool); + this.blobName = blobName; + this.clusterUUID = clusterUUID; + } + + @Override + public BlobPathParameters getBlobPathParameters() { + return new BlobPathParameters(List.of("transient"), CLUSTER_BLOCKS); + } + + @Override + public String getFullBlobName() { + return blobName; + } + + @Override + public String generateBlobFileName() { + // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/transient/______ + String blobFileName = String.join( + DELIMITER, + getBlobPathParameters().getFilePrefix(), + RemoteStoreUtils.invertLong(stateVersion), + RemoteStoreUtils.invertLong(System.currentTimeMillis()), + String.valueOf(CLUSTER_STATE_ATTRIBUTES_CURRENT_CODEC_VERSION) + ); + // setting the full blob path with name for future access + this.blobName = getBlobPathForUpload().buildAsString() + blobFileName; + return blobFileName; + } + + @Override + public UploadedMetadata getUploadedMetadata() { + assert blobName != null; + return new UploadedMetadataAttribute(CLUSTER_BLOCKS, blobName); + } + + @Override + public ClusterBlocks get() { + return clusterBlocks; + } + + @Override + public String clusterUUID() { + return clusterUUID; + } + + @Override + public InputStream serialize() throws IOException { + return CLUSTER_BLOCKS_FORMAT.serialize(clusterBlocks, generateBlobFileName(), getCompressor(), RemoteClusterStateUtils.FORMAT_PARAMS).streamInput(); + } + + @Override + public ClusterBlocks deserialize(InputStream inputStream) throws IOException { + return CLUSTER_BLOCKS_FORMAT.deserialize(blobName, getBlobStoreRepository().getNamedXContentRegistry(), Streams.readFully(inputStream)); + } +} diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateAttributesManager.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateAttributesManager.java new file mode 100644 index 0000000000000..5c7eb4cc25fee --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateAttributesManager.java @@ -0,0 +1,93 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + +import org.opensearch.action.LatchedActionListener; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.block.ClusterBlocks; +import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.common.CheckedRunnable; +import org.opensearch.core.action.ActionListener; +import org.opensearch.core.xcontent.ToXContent; +import org.opensearch.gateway.remote.RemoteClusterStateUtils.RemoteStateTransferException; +import org.opensearch.index.translog.transfer.BlobStoreTransferService; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.threadpool.ThreadPool; + +import java.io.IOException; + +public class RemoteClusterStateAttributesManager { + public static final String CLUSTER_STATE_ATTRIBUTE = "cluster_state_attribute"; + public static final String DISCOVERY_NODES = "nodes"; + public static final String CLUSTER_BLOCKS = "blocks"; + public static final String CUSTOM_PREFIX = "custom"; + public static final int CLUSTER_STATE_ATTRIBUTES_CURRENT_CODEC_VERSION = 1; + private final BlobStoreTransferService blobStoreTransferService; + private final BlobStoreRepository blobStoreRepository; + private final ThreadPool threadPool; + private final String clusterName; + + RemoteClusterStateAttributesManager(BlobStoreTransferService blobStoreTransferService, BlobStoreRepository repository, ThreadPool threadPool, String clusterName) { + this.blobStoreTransferService = blobStoreTransferService; + this.blobStoreRepository = repository; + this.threadPool = threadPool; + this.clusterName = clusterName; + } + + /** + * Allows async upload of Cluster State Attribute components to remote + */ + CheckedRunnable getAsyncMetadataWriteAction( + ClusterState clusterState, + String component, + ToXContent componentData, + LatchedActionListener latchedActionListener + ) { + AbstractRemoteBlobStoreObject remoteObject = getRemoteObject(componentData, clusterState.version(), clusterState.metadata().clusterUUID()); + ActionListener completionListener = ActionListener.wrap( + resp -> latchedActionListener.onResponse( + remoteObject.getUploadedMetadata() + ), + ex -> latchedActionListener.onFailure(new RemoteClusterStateUtils.RemoteStateTransferException(component, ex)) + ); + return remoteObject.writeAsync(completionListener); + } + + private AbstractRemoteBlobStoreObject getRemoteObject(ToXContent componentData, long stateVersion, String clusterUUID) { + if (componentData instanceof DiscoveryNodes) { + return new RemoteDiscoveryNodes((DiscoveryNodes)componentData, stateVersion, clusterUUID, blobStoreTransferService, blobStoreRepository, clusterName, threadPool); + } else if (componentData instanceof ClusterBlocks) { + return new RemoteClusterBlocks((ClusterBlocks) componentData, stateVersion, clusterUUID, blobStoreTransferService, blobStoreRepository, clusterName, threadPool); + } else { + throw new RemoteStateTransferException("Remote object not found for "+ componentData.getClass()); + } + } + + public CheckedRunnable getAsyncMetadataReadAction( + String clusterUUID, + String component, + String uploadedFilename, + LatchedActionListener listener + ) { + AbstractRemoteBlobStoreObject remoteObject = getRemoteObject(component, uploadedFilename, clusterUUID); + ActionListener actionListener = ActionListener.wrap(response -> listener.onResponse(new RemoteReadResult((ToXContent) response, CLUSTER_STATE_ATTRIBUTE, component)), listener::onFailure); + return () -> remoteObject.readAsync(actionListener); + } + + private AbstractRemoteBlobStoreObject getRemoteObject(String component, String blobName, String clusterUUID) { + if (component.equals(RemoteDiscoveryNodes.DISCOVERY_NODES)) { + return new RemoteDiscoveryNodes(blobName, clusterUUID, blobStoreTransferService, blobStoreRepository, clusterName, threadPool); + } else if (component.equals(RemoteClusterBlocks.CLUSTER_BLOCKS)) { + return new RemoteClusterBlocks(blobName, clusterUUID, blobStoreTransferService, blobStoreRepository, clusterName, threadPool); + } else { + throw new RemoteStateTransferException("Remote object not found for "+ component); + } + } + +} diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManager.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManager.java new file mode 100644 index 0000000000000..090dae144baf6 --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManager.java @@ -0,0 +1,392 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.message.ParameterizedMessage; +import org.apache.logging.log4j.util.Strings; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.service.ClusterApplierService; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.blobstore.BlobMetadata; +import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Setting; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.util.concurrent.AbstractAsyncTask; +import org.opensearch.core.action.ActionListener; +import org.opensearch.index.translog.transfer.BlobStoreTransferService; +import org.opensearch.threadpool.ThreadPool; + +import java.io.Closeable; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; + +import static org.opensearch.gateway.remote.RemoteGlobalMetadataManager.GLOBAL_METADATA_FORMAT; +import static org.opensearch.gateway.remote.RemoteGlobalMetadataManager.GLOBAL_METADATA_PATH_TOKEN; +import static org.opensearch.gateway.remote.RemoteIndexMetadata.INDEX_METADATA_FORMAT; +import static org.opensearch.gateway.remote.RemoteIndexMetadata.INDEX_PATH_TOKEN; +import static org.opensearch.gateway.remote.RemoteManifestManager.MANIFEST_FILE_PREFIX; +import static org.opensearch.gateway.remote.RemoteManifestManager.MANIFEST_PATH_TOKEN; + +/** + * A Manager which provides APIs to clean up stale cluster state files and runs an async stale cleanup task + * + * @opensearch.internal + */ +public class RemoteClusterStateCleanupManager implements Closeable { + + public static final int RETAINED_MANIFESTS = 10; + public static final int SKIP_CLEANUP_STATE_CHANGES = 10; + public static final TimeValue CLUSTER_STATE_CLEANUP_INTERVAL_DEFAULT = TimeValue.timeValueMinutes(5); + public static final TimeValue CLUSTER_STATE_CLEANUP_INTERVAL_MINIMUM = TimeValue.MINUS_ONE; + + /** + * Setting to specify the interval to do run stale file cleanup job + * Min value -1 indicates that the stale file cleanup job should be disabled + */ + public static final Setting REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING = Setting.timeSetting( + "cluster.remote_store.state.cleanup_interval", + CLUSTER_STATE_CLEANUP_INTERVAL_DEFAULT, + CLUSTER_STATE_CLEANUP_INTERVAL_MINIMUM, + Setting.Property.NodeScope, + Setting.Property.Dynamic + ); + private static final Logger logger = LogManager.getLogger(RemoteClusterStateCleanupManager.class); + private final RemoteClusterStateService remoteClusterStateService; + private final RemotePersistenceStats remoteStateStats; + private BlobStoreTransferService blobStoreTransferService; + private TimeValue staleFileCleanupInterval; + private final AtomicBoolean deleteStaleMetadataRunning = new AtomicBoolean(false); + private AsyncStaleFileDeletion staleFileDeletionTask; + private long lastCleanupAttemptStateVersion; + private final ThreadPool threadpool; + private final ClusterApplierService clusterApplierService; + + public RemoteClusterStateCleanupManager(RemoteClusterStateService remoteClusterStateService, ClusterService clusterService) { + this.remoteClusterStateService = remoteClusterStateService; + this.remoteStateStats = remoteClusterStateService.getStats(); + ClusterSettings clusterSettings = clusterService.getClusterSettings(); + this.clusterApplierService = clusterService.getClusterApplierService(); + this.staleFileCleanupInterval = clusterSettings.get(REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING); + this.threadpool = remoteClusterStateService.getThreadpool(); + // initialize with 0, a cleanup will be done when this node is elected master node and version is incremented more than threshold + this.lastCleanupAttemptStateVersion = 0; + clusterSettings.addSettingsUpdateConsumer(REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING, this::updateCleanupInterval); + } + + void start() { + staleFileDeletionTask = new AsyncStaleFileDeletion(this); + } + + @Override + public void close() throws IOException { + if (staleFileDeletionTask != null) { + staleFileDeletionTask.close(); + } + } + + private BlobStoreTransferService getBlobStoreTransferService() { + if (blobStoreTransferService == null) { + blobStoreTransferService = new BlobStoreTransferService(remoteClusterStateService.getBlobStore(), threadpool); + } + return blobStoreTransferService; + } + + private void updateCleanupInterval(TimeValue updatedInterval) { + this.staleFileCleanupInterval = updatedInterval; + logger.info("updated remote state cleanup interval to {}", updatedInterval); + // After updating the interval, we need to close the current task and create a new one which will run with updated interval + if (staleFileDeletionTask != null && !staleFileDeletionTask.getInterval().equals(updatedInterval)) { + staleFileDeletionTask.setInterval(updatedInterval); + } + } + + // visible for testing + void cleanUpStaleFiles() { + ClusterState currentAppliedState = clusterApplierService.state(); + if (currentAppliedState.nodes().isLocalNodeElectedClusterManager()) { + long cleanUpAttemptStateVersion = currentAppliedState.version(); + assert Strings.isNotEmpty(currentAppliedState.getClusterName().value()) : "cluster name is not set"; + assert Strings.isNotEmpty(currentAppliedState.metadata().clusterUUID()) : "cluster uuid is not set"; + if (cleanUpAttemptStateVersion - lastCleanupAttemptStateVersion > SKIP_CLEANUP_STATE_CHANGES) { + logger.info( + "Cleaning up stale remote state files for cluster [{}] with uuid [{}]. Last clean was done before {} updates", + currentAppliedState.getClusterName().value(), + currentAppliedState.metadata().clusterUUID(), + cleanUpAttemptStateVersion - lastCleanupAttemptStateVersion + ); + this.deleteStaleClusterMetadata( + currentAppliedState.getClusterName().value(), + currentAppliedState.metadata().clusterUUID(), + RETAINED_MANIFESTS + ); + lastCleanupAttemptStateVersion = cleanUpAttemptStateVersion; + } else { + logger.debug( + "Skipping cleanup of stale remote state files for cluster [{}] with uuid [{}]. Last clean was done before {} updates, which is less than threshold {}", + currentAppliedState.getClusterName().value(), + currentAppliedState.metadata().clusterUUID(), + cleanUpAttemptStateVersion - lastCleanupAttemptStateVersion, + SKIP_CLEANUP_STATE_CHANGES + ); + } + } else { + logger.debug("Skipping cleanup task as local node is not elected Cluster Manager"); + } + } + + private void addStaleGlobalMetadataPath(String fileName, Set filesToKeep, Set staleGlobalMetadataPaths) { + if (!filesToKeep.contains(fileName)) { + String[] splitPath = fileName.split("/"); + staleGlobalMetadataPaths.add( + new BlobPath().add(GLOBAL_METADATA_PATH_TOKEN).buildAsString() + GLOBAL_METADATA_FORMAT.blobName( + splitPath[splitPath.length - 1] + ) + ); + } + } + + // visible for testing + void deleteClusterMetadata( + String clusterName, + String clusterUUID, + List activeManifestBlobMetadata, + List staleManifestBlobMetadata + ) { + try { + Set filesToKeep = new HashSet<>(); + Set staleManifestPaths = new HashSet<>(); + Set staleIndexMetadataPaths = new HashSet<>(); + Set staleGlobalMetadataPaths = new HashSet<>(); + activeManifestBlobMetadata.forEach(blobMetadata -> { + ClusterMetadataManifest clusterMetadataManifest = remoteClusterStateService.getRemoteManifestManager().fetchRemoteClusterMetadataManifest( + clusterName, + clusterUUID, + blobMetadata.name() + ); + clusterMetadataManifest.getIndices() + .forEach(uploadedIndexMetadata -> filesToKeep.add(uploadedIndexMetadata.getUploadedFilename())); + if (clusterMetadataManifest.getCodecVersion() == ClusterMetadataManifest.CODEC_V1) { + filesToKeep.add(clusterMetadataManifest.getGlobalMetadataFileName()); + } else if (clusterMetadataManifest.getCodecVersion() >= ClusterMetadataManifest.CODEC_V2) { + filesToKeep.add(clusterMetadataManifest.getCoordinationMetadata().getUploadedFilename()); + filesToKeep.add(clusterMetadataManifest.getSettingsMetadata().getUploadedFilename()); + filesToKeep.add(clusterMetadataManifest.getTemplatesMetadata().getUploadedFilename()); + clusterMetadataManifest.getCustomMetadataMap().values().forEach(attribute -> filesToKeep.add(attribute.getUploadedFilename())); + } + }); + staleManifestBlobMetadata.forEach(blobMetadata -> { + ClusterMetadataManifest clusterMetadataManifest = remoteClusterStateService.getRemoteManifestManager().fetchRemoteClusterMetadataManifest( + clusterName, + clusterUUID, + blobMetadata.name() + ); + staleManifestPaths.add(new BlobPath().add(MANIFEST_PATH_TOKEN).buildAsString() + blobMetadata.name()); + if (clusterMetadataManifest.getCodecVersion() == ClusterMetadataManifest.CODEC_V1) { + addStaleGlobalMetadataPath(clusterMetadataManifest.getGlobalMetadataFileName(), filesToKeep, staleGlobalMetadataPaths); + } else if (clusterMetadataManifest.getCodecVersion() >= ClusterMetadataManifest.CODEC_V2) { + addStaleGlobalMetadataPath(clusterMetadataManifest.getCoordinationMetadata().getUploadedFilename(), filesToKeep, staleGlobalMetadataPaths); + addStaleGlobalMetadataPath(clusterMetadataManifest.getSettingsMetadata().getUploadedFilename(), filesToKeep, staleGlobalMetadataPaths); + addStaleGlobalMetadataPath(clusterMetadataManifest.getTemplatesMetadata().getUploadedFilename(), filesToKeep, staleGlobalMetadataPaths); + clusterMetadataManifest.getCustomMetadataMap().values().forEach(attribute -> { + addStaleGlobalMetadataPath(attribute.getUploadedFilename(), filesToKeep, staleGlobalMetadataPaths); + }); + } + + clusterMetadataManifest.getIndices().forEach(uploadedIndexMetadata -> { + if (filesToKeep.contains(uploadedIndexMetadata.getUploadedFilename()) == false) { + staleIndexMetadataPaths.add( + new BlobPath().add(INDEX_PATH_TOKEN).add(uploadedIndexMetadata.getIndexUUID()).buildAsString() + + INDEX_METADATA_FORMAT.blobName(uploadedIndexMetadata.getUploadedFilename()) + ); + } + }); + }); + + if (staleManifestPaths.isEmpty()) { + logger.debug("No stale Remote Cluster Metadata files found"); + return; + } + + deleteStalePaths(clusterName, clusterUUID, new ArrayList<>(staleGlobalMetadataPaths)); + deleteStalePaths(clusterName, clusterUUID, new ArrayList<>(staleIndexMetadataPaths)); + deleteStalePaths(clusterName, clusterUUID, new ArrayList<>(staleManifestPaths)); + } catch (IllegalStateException e) { + logger.error("Error while fetching Remote Cluster Metadata manifests", e); + } catch (IOException e) { + logger.error("Error while deleting stale Remote Cluster Metadata files", e); + remoteStateStats.cleanUpAttemptFailed(); + } catch (Exception e) { + logger.error("Unexpected error while deleting stale Remote Cluster Metadata files", e); + remoteStateStats.cleanUpAttemptFailed(); + } + } + + /** + * Deletes older than last {@code versionsToRetain} manifests. Also cleans up unreferenced IndexMetadata associated with older manifests + * + * @param clusterName name of the cluster + * @param clusterUUID uuid of cluster state to refer to in remote + * @param manifestsToRetain no of latest manifest files to keep in remote + */ + // package private for testing + void deleteStaleClusterMetadata(String clusterName, String clusterUUID, int manifestsToRetain) { + if (deleteStaleMetadataRunning.compareAndSet(false, true) == false) { + logger.info("Delete stale cluster metadata task is already in progress."); + return; + } + try { + getBlobStoreTransferService().listAllInSortedOrderAsync( + ThreadPool.Names.REMOTE_PURGE, + remoteClusterStateService.getRemoteManifestManager().getManifestFolderPath(clusterName, clusterUUID), + MANIFEST_FILE_PREFIX, + Integer.MAX_VALUE, + new ActionListener<>() { + @Override + public void onResponse(List blobMetadata) { + if (blobMetadata.size() > manifestsToRetain) { + deleteClusterMetadata( + clusterName, + clusterUUID, + blobMetadata.subList(0, manifestsToRetain), + blobMetadata.subList(manifestsToRetain, blobMetadata.size()) + ); + } + deleteStaleMetadataRunning.set(false); + } + + @Override + public void onFailure(Exception e) { + logger.error( + new ParameterizedMessage( + "Exception occurred while deleting Remote Cluster Metadata for clusterUUIDs {}", + clusterUUID + ) + ); + deleteStaleMetadataRunning.set(false); + } + } + ); + } catch (Exception e) { + deleteStaleMetadataRunning.set(false); + throw e; + } + } + + /** + * Purges all remote cluster state against provided cluster UUIDs + * + * @param clusterName name of the cluster + * @param clusterUUIDs clusteUUIDs for which the remote state needs to be purged + */ + void deleteStaleUUIDsClusterMetadata(String clusterName, List clusterUUIDs) { + clusterUUIDs.forEach( + clusterUUID -> getBlobStoreTransferService().deleteAsync( + ThreadPool.Names.REMOTE_PURGE, + RemoteClusterStateUtils.getCusterMetadataBasePath(remoteClusterStateService.getBlobStoreRepository(), clusterName, clusterUUID), + new ActionListener<>() { + @Override + public void onResponse(Void unused) { + logger.info("Deleted all remote cluster metadata for cluster UUID - {}", clusterUUID); + } + + @Override + public void onFailure(Exception e) { + logger.error( + new ParameterizedMessage( + "Exception occurred while deleting all remote cluster metadata for cluster UUID {}", + clusterUUID + ), + e + ); + remoteStateStats.cleanUpAttemptFailed(); + } + } + ) + ); + } + + // package private for testing + void deleteStalePaths(String clusterName, String clusterUUID, List stalePaths) throws IOException { + logger.debug(String.format(Locale.ROOT, "Deleting stale files from remote - %s", stalePaths)); + getBlobStoreTransferService().deleteBlobs( + RemoteClusterStateUtils.getCusterMetadataBasePath(remoteClusterStateService.getBlobStoreRepository(), clusterName, clusterUUID), + stalePaths + ); + } + + /** + * Purges all remote cluster state against provided cluster UUIDs + * @param clusterState current state of the cluster + * @param committedManifest last committed ClusterMetadataManifest + */ + public void deleteStaleClusterUUIDs(ClusterState clusterState, ClusterMetadataManifest committedManifest) { + threadpool.executor(ThreadPool.Names.REMOTE_PURGE).execute(() -> { + String clusterName = clusterState.getClusterName().value(); + logger.debug("Deleting stale cluster UUIDs data from remote [{}]", clusterName); + Set allClustersUUIDsInRemote; + try { + allClustersUUIDsInRemote = new HashSet<>( + remoteClusterStateService.getAllClusterUUIDs(clusterState.getClusterName().value()) + ); + } catch (IOException e) { + logger.info(String.format(Locale.ROOT, "Error while fetching all cluster UUIDs for [%s]", clusterName)); + return; + } + // Retain last 2 cluster uuids data + allClustersUUIDsInRemote.remove(committedManifest.getClusterUUID()); + allClustersUUIDsInRemote.remove(committedManifest.getPreviousClusterUUID()); + deleteStaleUUIDsClusterMetadata(clusterName, new ArrayList<>(allClustersUUIDsInRemote)); + }); + } + + public TimeValue getStaleFileCleanupInterval() { + return this.staleFileCleanupInterval; + } + + AsyncStaleFileDeletion getStaleFileDeletionTask() { // for testing + return this.staleFileDeletionTask; + } + + RemotePersistenceStats getStats() { + return this.remoteStateStats; + } + + static final class AsyncStaleFileDeletion extends AbstractAsyncTask { + private final RemoteClusterStateCleanupManager remoteClusterStateCleanupManager; + + AsyncStaleFileDeletion(RemoteClusterStateCleanupManager remoteClusterStateCleanupManager) { + super( + logger, + remoteClusterStateCleanupManager.threadpool, + remoteClusterStateCleanupManager.getStaleFileCleanupInterval(), + true + ); + this.remoteClusterStateCleanupManager = remoteClusterStateCleanupManager; + rescheduleIfNecessary(); + } + + @Override + protected boolean mustReschedule() { + return true; + } + + @Override + protected void runInternal() { + remoteClusterStateCleanupManager.cleanUpStaleFiles(); + } + } +} diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java index c892b475d71da..4a06d7928e91b 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java @@ -8,18 +8,61 @@ package org.opensearch.gateway.remote; +import static org.opensearch.gateway.PersistedClusterStateService.SLOW_WRITE_LOGGING_THRESHOLD; +import static org.opensearch.gateway.remote.RemoteClusterStateAttributesManager.CLUSTER_BLOCKS; +import static org.opensearch.gateway.remote.RemoteClusterStateAttributesManager.CLUSTER_STATE_ATTRIBUTE; +import static org.opensearch.gateway.remote.RemoteClusterStateAttributesManager.DISCOVERY_NODES; +import static org.opensearch.gateway.remote.RemoteClusterStateUtils.DELIMITER; + +import static org.opensearch.gateway.remote.RemoteClusterStateUtils.UploadedMetadataResults; +import static org.opensearch.gateway.remote.RemoteClusterStateUtils.clusterUUIDContainer; +import static org.opensearch.gateway.remote.RemoteGlobalMetadataManager.COORDINATION_METADATA; +import static org.opensearch.gateway.remote.RemoteGlobalMetadataManager.CUSTOM_DELIMITER; +import static org.opensearch.gateway.remote.RemoteGlobalMetadataManager.CUSTOM_METADATA; +import static org.opensearch.gateway.remote.RemoteGlobalMetadataManager.SETTING_METADATA; +import static org.opensearch.gateway.remote.RemoteGlobalMetadataManager.TEMPLATES_METADATA; +import static org.opensearch.gateway.remote.RemoteIndexMetadata.INDEX_PATH_TOKEN; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.isRemoteRoutingTableEnabled; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.isRemoteStoreClusterStateEnabled; + +import java.io.Closeable; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.Function; +import java.util.function.LongSupplier; +import java.util.function.Supplier; +import java.util.stream.Collectors; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; -import org.opensearch.Version; import org.opensearch.action.LatchedActionListener; +import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.block.ClusterBlocks; +import org.opensearch.cluster.coordination.CoordinationMetadata; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.metadata.TemplatesMetadata; +import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.IndexRoutingTable; +import org.opensearch.cluster.routing.remote.RemoteRoutingTableService; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.routing.RoutingTable; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.CheckedRunnable; import org.opensearch.common.Nullable; import org.opensearch.common.blobstore.BlobContainer; -import org.opensearch.common.blobstore.BlobMetadata; -import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.common.blobstore.BlobStore; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Setting.Property; @@ -30,113 +73,25 @@ import org.opensearch.core.index.Index; import org.opensearch.core.xcontent.ToXContent; import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedIndexMetadata; -import org.opensearch.index.remote.RemoteStoreUtils; +import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedMetadataAttribute; import org.opensearch.index.translog.transfer.BlobStoreTransferService; import org.opensearch.node.Node; import org.opensearch.node.remotestore.RemoteStoreNodeAttribute; import org.opensearch.repositories.RepositoriesService; import org.opensearch.repositories.Repository; import org.opensearch.repositories.blobstore.BlobStoreRepository; -import org.opensearch.repositories.blobstore.ChecksumBlobStoreFormat; import org.opensearch.threadpool.ThreadPool; -import java.io.Closeable; -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Base64; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.Objects; -import java.util.Optional; -import java.util.Set; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicReference; -import java.util.function.Function; -import java.util.function.LongSupplier; -import java.util.function.Supplier; -import java.util.stream.Collectors; - -import static org.opensearch.gateway.PersistedClusterStateService.SLOW_WRITE_LOGGING_THRESHOLD; -import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.isRemoteStoreClusterStateEnabled; - /** * A Service which provides APIs to upload and download cluster metadata from remote store. * * @opensearch.internal */ public class RemoteClusterStateService implements Closeable { - - public static final String METADATA_NAME_FORMAT = "%s.dat"; - - public static final String METADATA_MANIFEST_NAME_FORMAT = "%s"; - public static final int RETAINED_MANIFESTS = 10; - public static final String DELIMITER = "__"; - private static final Logger logger = LogManager.getLogger(RemoteClusterStateService.class); - public static final TimeValue INDEX_METADATA_UPLOAD_TIMEOUT_DEFAULT = TimeValue.timeValueMillis(20000); - - public static final TimeValue GLOBAL_METADATA_UPLOAD_TIMEOUT_DEFAULT = TimeValue.timeValueMillis(20000); - - public static final TimeValue METADATA_MANIFEST_UPLOAD_TIMEOUT_DEFAULT = TimeValue.timeValueMillis(20000); - - public static final Setting INDEX_METADATA_UPLOAD_TIMEOUT_SETTING = Setting.timeSetting( - "cluster.remote_store.state.index_metadata.upload_timeout", - INDEX_METADATA_UPLOAD_TIMEOUT_DEFAULT, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - - public static final Setting GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING = Setting.timeSetting( - "cluster.remote_store.state.global_metadata.upload_timeout", - GLOBAL_METADATA_UPLOAD_TIMEOUT_DEFAULT, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - - public static final Setting METADATA_MANIFEST_UPLOAD_TIMEOUT_SETTING = Setting.timeSetting( - "cluster.remote_store.state.metadata_manifest.upload_timeout", - METADATA_MANIFEST_UPLOAD_TIMEOUT_DEFAULT, - Setting.Property.Dynamic, - Setting.Property.NodeScope - ); - - public static final ChecksumBlobStoreFormat INDEX_METADATA_FORMAT = new ChecksumBlobStoreFormat<>( - "index-metadata", - METADATA_NAME_FORMAT, - IndexMetadata::fromXContent - ); - - public static final ChecksumBlobStoreFormat GLOBAL_METADATA_FORMAT = new ChecksumBlobStoreFormat<>( - "metadata", - METADATA_NAME_FORMAT, - Metadata::fromXContent - ); - - /** - * Manifest format compatible with older codec v0, where codec version was missing. - */ - public static final ChecksumBlobStoreFormat CLUSTER_METADATA_MANIFEST_FORMAT_V0 = - new ChecksumBlobStoreFormat<>("cluster-metadata-manifest", METADATA_MANIFEST_NAME_FORMAT, ClusterMetadataManifest::fromXContentV0); - - /** - * Manifest format compatible with codec v1, where we introduced codec versions/global metadata. - */ - public static final ChecksumBlobStoreFormat CLUSTER_METADATA_MANIFEST_FORMAT = new ChecksumBlobStoreFormat<>( - "cluster-metadata-manifest", - METADATA_MANIFEST_NAME_FORMAT, - ClusterMetadataManifest::fromXContent - ); - /** * Used to specify if cluster state metadata should be published to remote store */ @@ -147,72 +102,89 @@ public class RemoteClusterStateService implements Closeable { Property.Final ); - public static final String CLUSTER_STATE_PATH_TOKEN = "cluster-state"; - public static final String INDEX_PATH_TOKEN = "index"; - public static final String GLOBAL_METADATA_PATH_TOKEN = "global-metadata"; - public static final String MANIFEST_PATH_TOKEN = "manifest"; - public static final String MANIFEST_FILE_PREFIX = "manifest"; - public static final String METADATA_FILE_PREFIX = "metadata"; - public static final int SPLITED_MANIFEST_FILE_LENGTH = 6; // file name manifest__term__version__C/P__timestamp__codecversion + public static final TimeValue REMOTE_STATE_READ_TIMEOUT_DEFAULT = TimeValue.timeValueMillis(20000); + + public static final Setting REMOTE_STATE_READ_TIMEOUT_SETTING = Setting.timeSetting( + "cluster.remote_store.state.read_timeout", + REMOTE_STATE_READ_TIMEOUT_DEFAULT, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + private TimeValue remoteStateReadTimeout; private final String nodeId; private final Supplier repositoriesService; private final Settings settings; private final LongSupplier relativeTimeNanosSupplier; private final ThreadPool threadpool; + private final List indexMetadataUploadListeners; private BlobStoreRepository blobStoreRepository; - private BlobStoreTransferService blobStoreTransferService; + private RemoteRoutingTableService remoteRoutingTableService; private volatile TimeValue slowWriteLoggingThreshold; - private volatile TimeValue indexMetadataUploadTimeout; - private volatile TimeValue globalMetadataUploadTimeout; - private volatile TimeValue metadataManifestUploadTimeout; - - private final AtomicBoolean deleteStaleMetadataRunning = new AtomicBoolean(false); private final RemotePersistenceStats remoteStateStats; + private RemoteClusterStateCleanupManager remoteClusterStateCleanupManager; + private RemoteIndexMetadataManager remoteIndexMetadataManager; + private RemoteGlobalMetadataManager remoteGlobalMetadataManager; + private RemoteClusterStateAttributesManager remoteClusterStateAttributesManager; + private RemoteManifestManager remoteManifestManager; + private ClusterSettings clusterSettings; + private BlobStoreTransferService blobStoreTransferService; + private final String CLUSTER_STATE_UPLOAD_TIME_LOG_STRING = "writing cluster state for version [{}] took [{}ms]"; + private final String METADATA_UPDATE_LOG_STRING = "wrote metadata for [{}] indices and skipped [{}] unchanged " + + "indices, coordination metadata updated : [{}], settings metadata updated : [{}], templates metadata " + + "updated : [{}], custom metadata updated : [{}]"; public static final int INDEX_METADATA_CURRENT_CODEC_VERSION = 1; - public static final int MANIFEST_CURRENT_CODEC_VERSION = ClusterMetadataManifest.CODEC_V1; - public static final int GLOBAL_METADATA_CURRENT_CODEC_VERSION = 1; // ToXContent Params with gateway mode. // We are using gateway context mode to persist all custom metadata. public static final ToXContent.Params FORMAT_PARAMS; + static { Map params = new HashMap<>(1); params.put(Metadata.CONTEXT_MODE_PARAM, Metadata.CONTEXT_MODE_GATEWAY); FORMAT_PARAMS = new ToXContent.MapParams(params); } + private String latestClusterName; + private String latestClusterUUID; + private long lastCleanupAttemptState; + private boolean isClusterManagerNode; public RemoteClusterStateService( String nodeId, Supplier repositoriesService, Settings settings, - ClusterSettings clusterSettings, + ClusterService clusterService, LongSupplier relativeTimeNanosSupplier, - ThreadPool threadPool + ThreadPool threadPool, + List indexMetadataUploadListeners ) { assert isRemoteStoreClusterStateEnabled(settings) : "Remote cluster state is not enabled"; + logger.info("REMOTE STATE ENABLED"); + this.nodeId = nodeId; this.repositoriesService = repositoriesService; this.settings = settings; this.relativeTimeNanosSupplier = relativeTimeNanosSupplier; this.threadpool = threadPool; + clusterSettings = clusterService.getClusterSettings(); this.slowWriteLoggingThreshold = clusterSettings.get(SLOW_WRITE_LOGGING_THRESHOLD); - this.indexMetadataUploadTimeout = clusterSettings.get(INDEX_METADATA_UPLOAD_TIMEOUT_SETTING); - this.globalMetadataUploadTimeout = clusterSettings.get(GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING); - this.metadataManifestUploadTimeout = clusterSettings.get(METADATA_MANIFEST_UPLOAD_TIMEOUT_SETTING); clusterSettings.addSettingsUpdateConsumer(SLOW_WRITE_LOGGING_THRESHOLD, this::setSlowWriteLoggingThreshold); - clusterSettings.addSettingsUpdateConsumer(INDEX_METADATA_UPLOAD_TIMEOUT_SETTING, this::setIndexMetadataUploadTimeout); - clusterSettings.addSettingsUpdateConsumer(GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING, this::setGlobalMetadataUploadTimeout); - clusterSettings.addSettingsUpdateConsumer(METADATA_MANIFEST_UPLOAD_TIMEOUT_SETTING, this::setMetadataManifestUploadTimeout); + this.remoteStateReadTimeout = clusterSettings.get(REMOTE_STATE_READ_TIMEOUT_SETTING); + clusterSettings.addSettingsUpdateConsumer(REMOTE_STATE_READ_TIMEOUT_SETTING, this::setRemoteClusterStateEnabled); this.remoteStateStats = new RemotePersistenceStats(); - } - private BlobStoreTransferService getBlobStoreTransferService() { - if (blobStoreTransferService == null) { - blobStoreTransferService = new BlobStoreTransferService(blobStoreRepository.blobStore(), threadpool); + if(isRemoteRoutingTableEnabled(settings)) { + this.remoteRoutingTableService = new RemoteRoutingTableService(repositoriesService, + settings, threadPool); + logger.info("REMOTE ROUTING ENABLED"); + } else { + logger.info("REMOTE ROUTING DISABLED"); } - return blobStoreTransferService; + this.lastCleanupAttemptState = 0; + this.isClusterManagerNode = DiscoveryNode.isClusterManagerNode(settings); + this.remoteClusterStateCleanupManager = new RemoteClusterStateCleanupManager(this, clusterService); + this.indexMetadataUploadListeners = indexMetadataUploadListeners; } /** @@ -223,28 +195,41 @@ private BlobStoreTransferService getBlobStoreTransferService() { */ @Nullable public ClusterMetadataManifest writeFullMetadata(ClusterState clusterState, String previousClusterUUID) throws IOException { + logger.info("WRITING FULL STATE"); final long startTimeNanos = relativeTimeNanosSupplier.getAsLong(); if (clusterState.nodes().isLocalNodeElectedClusterManager() == false) { logger.error("Local node is not elected cluster manager. Exiting"); return null; } - // TODO: we can upload global metadata and index metadata in parallel. [issue: #10645] - // Write globalMetadata - String globalMetadataFile = writeGlobalMetadata(clusterState); - - // any validations before/after upload ? - final List allUploadedIndexMetadata = writeIndexMetadataParallel( + UploadedMetadataResults uploadedMetadataResults = writeMetadataInParallel( clusterState, - new ArrayList<>(clusterState.metadata().indices().values()) - ); - final ClusterMetadataManifest manifest = uploadManifest( + new ArrayList<>(clusterState.metadata().indices().values()), + Collections.emptyMap(), + clusterState.metadata().customs(), + true, + true, + true, + true, + true, + new ArrayList<>(clusterState.getRoutingTable().indicesRouting().values())); + + final ClusterMetadataManifest manifest = remoteManifestManager.uploadManifest( clusterState, - allUploadedIndexMetadata, + uploadedMetadataResults.uploadedIndexMetadata, previousClusterUUID, - globalMetadataFile, + uploadedMetadataResults.uploadedCoordinationMetadata, + uploadedMetadataResults.uploadedSettingsMetadata, + uploadedMetadataResults.uploadedTemplatesMetadata, + uploadedMetadataResults.uploadedCustomMetadataMap, + uploadedMetadataResults.uploadedDiscoveryNodes, + uploadedMetadataResults.uploadedClusterBlocks, + new ClusterStateDiffManifest(clusterState, ClusterState.EMPTY_STATE), + uploadedMetadataResults.uploadedIndicesRoutingMetadata, false ); + + logger.info("MANIFEST IN FULL STATE {}", manifest); final long durationMillis = TimeValue.nsecToMSec(relativeTimeNanosSupplier.getAsLong() - startTimeNanos); remoteStateStats.stateSucceeded(); remoteStateStats.stateTook(durationMillis); @@ -253,13 +238,13 @@ public ClusterMetadataManifest writeFullMetadata(ClusterState clusterState, Stri "writing cluster state took [{}ms] which is above the warn threshold of [{}]; " + "wrote full state with [{}] indices", durationMillis, slowWriteLoggingThreshold, - allUploadedIndexMetadata.size() + uploadedMetadataResults.uploadedIndexMetadata.size() ); } else { logger.info( "writing cluster state took [{}ms]; " + "wrote full state with [{}] indices and global metadata", durationMillis, - allUploadedIndexMetadata.size() + uploadedMetadataResults.uploadedIndexMetadata.size() ); } return manifest; @@ -278,6 +263,8 @@ public ClusterMetadataManifest writeIncrementalMetadata( ClusterState clusterState, ClusterMetadataManifest previousManifest ) throws IOException { + logger.info("WRITING INCREMENTAL STATE"); + final long startTimeNanos = relativeTimeNanosSupplier.getAsLong(); if (clusterState.nodes().isLocalNodeElectedClusterManager() == false) { logger.error("Local node is not elected cluster manager. Exiting"); @@ -285,27 +272,14 @@ public ClusterMetadataManifest writeIncrementalMetadata( } assert previousClusterState.metadata().coordinationMetadata().term() == clusterState.metadata().coordinationMetadata().term(); - // Write Global Metadata - final boolean updateGlobalMetadata = Metadata.isGlobalStateEquals( - previousClusterState.metadata(), - clusterState.metadata() - ) == false; - String globalMetadataFile; - // For migration case from codec V0 to V1, we have added null check on global metadata file, - // If file is empty and codec is 1 then write global metadata. - if (updateGlobalMetadata || previousManifest.getGlobalMetadataFileName() == null) { - globalMetadataFile = writeGlobalMetadata(clusterState); - } else { - logger.debug("Global metadata has not updated in cluster state, skipping upload of it"); - globalMetadataFile = previousManifest.getGlobalMetadataFileName(); - } - - // Write Index Metadata - final Map previousStateIndexMetadataVersionByName = new HashMap<>(); - for (final IndexMetadata indexMetadata : previousClusterState.metadata().indices().values()) { - previousStateIndexMetadataVersionByName.put(indexMetadata.getIndex().getName(), indexMetadata.getVersion()); + final Map customsToBeDeletedFromRemote = new HashMap<>(previousManifest.getCustomMetadataMap()); + final Map customsToUpload = remoteGlobalMetadataManager.getUpdatedCustoms(clusterState, previousClusterState); + final Map allUploadedCustomMap = new HashMap<>(previousManifest.getCustomMetadataMap()); + for (final String custom : clusterState.metadata().customs().keySet()) { + // remove all the customs which are present currently + customsToBeDeletedFromRemote.remove(custom); } - + final Map indicesToBeDeletedFromRemote = new HashMap<>(previousClusterState.metadata().indices()); int numIndicesUpdated = 0; int numIndicesUnchanged = 0; final Map allUploadedIndexMetadata = previousManifest.getIndices() @@ -313,9 +287,12 @@ public ClusterMetadataManifest writeIncrementalMetadata( .collect(Collectors.toMap(UploadedIndexMetadata::getIndexName, Function.identity())); List toUpload = new ArrayList<>(); - + // We prepare a map that contains the previous index metadata for the indexes for which version has changed. + Map prevIndexMetadataByName = new HashMap<>(); for (final IndexMetadata indexMetadata : clusterState.metadata().indices().values()) { - final Long previousVersion = previousStateIndexMetadataVersionByName.get(indexMetadata.getIndex().getName()); + String indexName = indexMetadata.getIndex().getName(); + final IndexMetadata prevIndexMetadata = indicesToBeDeletedFromRemote.get(indexName); + Long previousVersion = prevIndexMetadata != null ? prevIndexMetadata.getVersion() : null; if (previousVersion == null || indexMetadata.getVersion() != previousVersion) { logger.debug( "updating metadata for [{}], changing version from [{}] to [{}]", @@ -325,136 +302,171 @@ public ClusterMetadataManifest writeIncrementalMetadata( ); numIndicesUpdated++; toUpload.add(indexMetadata); + prevIndexMetadataByName.put(indexName, prevIndexMetadata); } else { numIndicesUnchanged++; } - previousStateIndexMetadataVersionByName.remove(indexMetadata.getIndex().getName()); + // index present in current cluster state + indicesToBeDeletedFromRemote.remove(indexMetadata.getIndex().getName()); + } + + List indicesRoutingToUpload = new ArrayList<>(); + if(remoteRoutingTableService!=null) { + indicesRoutingToUpload = remoteRoutingTableService.getChangedIndicesRouting(previousClusterState, clusterState); + } + UploadedMetadataResults uploadedMetadataResults; + // For migration case from codec V0 or V1 to V2, we have added null check on metadata attribute files, + // If file is empty and codec is 1 then write global metadata. + boolean firstUploadForSplitGlobalMetadata = !previousManifest.hasMetadataAttributesFiles(); + boolean updateCoordinationMetadata = firstUploadForSplitGlobalMetadata + || Metadata.isCoordinationMetadataEqual(previousClusterState.metadata(), clusterState.metadata()) == false; + ; + boolean updateSettingsMetadata = firstUploadForSplitGlobalMetadata + || Metadata.isSettingsMetadataEqual(previousClusterState.metadata(), clusterState.metadata()) == false; + boolean updateTemplatesMetadata = firstUploadForSplitGlobalMetadata + || Metadata.isTemplatesMetadataEqual(previousClusterState.metadata(), clusterState.metadata()) == false; + // Write Global Metadata + final boolean updateGlobalMetadata = Metadata.isGlobalStateEquals( + previousClusterState.metadata(), + clusterState.metadata() + ) == false; + // ToDo: check if these needs to be updated or not + final boolean updateDiscoveryNodes = clusterState.getNodes().delta(previousClusterState.getNodes()).hasChanges(); + final boolean updateClusterBlocks = clusterState.blocks().equals(previousClusterState.blocks()); + + // Write Index Metadata + final Map previousStateIndexMetadataByName = new HashMap<>(); + for (final IndexMetadata indexMetadata : previousClusterState.metadata().indices().values()) { + previousStateIndexMetadataByName.put(indexMetadata.getIndex().getName(), indexMetadata); } - List uploadedIndexMetadataList = writeIndexMetadataParallel(clusterState, toUpload); - uploadedIndexMetadataList.forEach( + + uploadedMetadataResults = writeMetadataInParallel( + clusterState, + toUpload, + prevIndexMetadataByName, + firstUploadForSplitGlobalMetadata ? clusterState.metadata().customs() : customsToUpload, + updateCoordinationMetadata, + updateSettingsMetadata, + updateTemplatesMetadata, + updateDiscoveryNodes, + updateClusterBlocks, + indicesRoutingToUpload + ); + + + // update the map if the metadata was uploaded + uploadedMetadataResults.uploadedIndexMetadata.forEach( uploadedIndexMetadata -> allUploadedIndexMetadata.put(uploadedIndexMetadata.getIndexName(), uploadedIndexMetadata) ); - - for (String removedIndexName : previousStateIndexMetadataVersionByName.keySet()) { - allUploadedIndexMetadata.remove(removedIndexName); + allUploadedCustomMap.putAll(uploadedMetadataResults.uploadedCustomMetadataMap); + // remove the data for removed custom/indices + customsToBeDeletedFromRemote.keySet().forEach(allUploadedCustomMap::remove); + indicesToBeDeletedFromRemote.keySet().forEach(allUploadedIndexMetadata::remove); + + List allUploadedIndicesRouting = new ArrayList<>(); + if(remoteRoutingTableService!=null) { + allUploadedIndicesRouting = remoteRoutingTableService.getAllUploadedIndicesRouting(previousManifest, uploadedMetadataResults.uploadedIndicesRoutingMetadata, indicesToBeDeletedFromRemote.keySet()); } - final ClusterMetadataManifest manifest = uploadManifest( + + final ClusterMetadataManifest manifest = remoteManifestManager.uploadManifest( clusterState, new ArrayList<>(allUploadedIndexMetadata.values()), previousManifest.getPreviousClusterUUID(), - globalMetadataFile, - false + updateCoordinationMetadata ? uploadedMetadataResults.uploadedCoordinationMetadata : previousManifest.getCoordinationMetadata(), + updateSettingsMetadata ? uploadedMetadataResults.uploadedSettingsMetadata : previousManifest.getSettingsMetadata(), + updateTemplatesMetadata ? uploadedMetadataResults.uploadedTemplatesMetadata : previousManifest.getTemplatesMetadata(), + firstUploadForSplitGlobalMetadata || !customsToUpload.isEmpty() + ? allUploadedCustomMap + : previousManifest.getCustomMetadataMap(), + firstUploadForSplitGlobalMetadata || updateDiscoveryNodes ? uploadedMetadataResults.uploadedDiscoveryNodes : previousManifest.getDiscoveryNodesMetadata(), + firstUploadForSplitGlobalMetadata || updateClusterBlocks ? uploadedMetadataResults.uploadedClusterBlocks : previousManifest.getClusterBlocksMetadata(), + new ClusterStateDiffManifest(clusterState, previousClusterState), + allUploadedIndicesRouting, false ); - deleteStaleClusterMetadata(clusterState.getClusterName().value(), clusterState.metadata().clusterUUID(), RETAINED_MANIFESTS); + + logger.info("MANIFEST IN INC STATE {}", manifest); + + this.latestClusterName = clusterState.getClusterName().value(); + this.latestClusterUUID = clusterState.metadata().clusterUUID(); final long durationMillis = TimeValue.nsecToMSec(relativeTimeNanosSupplier.getAsLong() - startTimeNanos); remoteStateStats.stateSucceeded(); remoteStateStats.stateTook(durationMillis); + ParameterizedMessage clusterStateUploadTimeMessage = new ParameterizedMessage( + CLUSTER_STATE_UPLOAD_TIME_LOG_STRING, + manifest.getStateVersion(), + durationMillis + ); + ParameterizedMessage metadataUpdateMessage = new ParameterizedMessage( + METADATA_UPDATE_LOG_STRING, + numIndicesUpdated, + numIndicesUnchanged, + updateCoordinationMetadata, + updateSettingsMetadata, + updateTemplatesMetadata, + customsToUpload.size() + ); if (durationMillis >= slowWriteLoggingThreshold.getMillis()) { logger.warn( + "{} which is above the warn threshold of [{}]; {}", + clusterStateUploadTimeMessage, "writing cluster state took [{}ms] which is above the warn threshold of [{}]; " - + "wrote metadata for [{}] indices and skipped [{}] unchanged indices, global metadata updated : [{}]", + + "wrote metadata for [{}] indices and skipped [{}] unchanged indices, coordination metadata updated : [{}], " + + "settings metadata updated : [{}], templates metadata updated : [{}], custom metadata updated : [{}]", durationMillis, slowWriteLoggingThreshold, numIndicesUpdated, numIndicesUnchanged, - updateGlobalMetadata + updateCoordinationMetadata, + updateSettingsMetadata, + updateTemplatesMetadata, + customsToUpload.size() ); } else { + logger.info("{}; {}", clusterStateUploadTimeMessage, metadataUpdateMessage); logger.info( "writing cluster state for version [{}] took [{}ms]; " - + "wrote metadata for [{}] indices and skipped [{}] unchanged indices, global metadata updated : [{}]", + + "wrote metadata for [{}] indices and skipped [{}] unchanged indices, coordination metadata updated : [{}], " + + "settings metadata updated : [{}], templates metadata updated : [{}], custom metadata updated : [{}]", manifest.getStateVersion(), durationMillis, numIndicesUpdated, numIndicesUnchanged, - updateGlobalMetadata + updateCoordinationMetadata, + updateSettingsMetadata, + updateTemplatesMetadata, + customsToUpload.size() ); } return manifest; } - /** - * Uploads provided ClusterState's global Metadata to remote store in parallel. - * The call is blocking so the method waits for upload to finish and then return. - * - * @param clusterState current ClusterState - * @return String file name where globalMetadata file is stored. - */ - private String writeGlobalMetadata(ClusterState clusterState) throws IOException { - - AtomicReference result = new AtomicReference(); - AtomicReference exceptionReference = new AtomicReference(); - - final BlobContainer globalMetadataContainer = globalMetadataContainer( - clusterState.getClusterName().value(), - clusterState.metadata().clusterUUID() - ); - final String globalMetadataFilename = globalMetadataFileName(clusterState.metadata()); - - // latch to wait until upload is not finished - CountDownLatch latch = new CountDownLatch(1); - - LatchedActionListener completionListener = new LatchedActionListener<>(ActionListener.wrap(resp -> { - logger.trace(String.format(Locale.ROOT, "GlobalMetadata uploaded successfully.")); - result.set(globalMetadataContainer.path().buildAsString() + globalMetadataFilename); - }, ex -> { exceptionReference.set(ex); }), latch); - - GLOBAL_METADATA_FORMAT.writeAsyncWithUrgentPriority( - clusterState.metadata(), - globalMetadataContainer, - globalMetadataFilename, - blobStoreRepository.getCompressor(), - completionListener, - FORMAT_PARAMS - ); - - try { - if (latch.await(getGlobalMetadataUploadTimeout().millis(), TimeUnit.MILLISECONDS) == false) { - // TODO: We should add metrics where transfer is timing out. [Issue: #10687] - RemoteStateTransferException ex = new RemoteStateTransferException( - String.format(Locale.ROOT, "Timed out waiting for transfer of global metadata to complete") - ); - throw ex; - } - } catch (InterruptedException ex) { - RemoteStateTransferException exception = new RemoteStateTransferException( - String.format(Locale.ROOT, "Timed out waiting for transfer of global metadata to complete - %s"), - ex - ); - Thread.currentThread().interrupt(); - throw exception; - } - if (exceptionReference.get() != null) { - throw new RemoteStateTransferException(exceptionReference.get().getMessage(), exceptionReference.get()); - } - return result.get(); - } - - /** - * Uploads provided IndexMetadata's to remote store in parallel. The call is blocking so the method waits for upload to finish and then return. - * - * @param clusterState current ClusterState - * @param toUpload list of IndexMetadata to upload - * @return {@code List} list of IndexMetadata uploaded to remote - */ - private List writeIndexMetadataParallel(ClusterState clusterState, List toUpload) - throws IOException { - List exceptionList = Collections.synchronizedList(new ArrayList<>(toUpload.size())); - final CountDownLatch latch = new CountDownLatch(toUpload.size()); - List result = new ArrayList<>(toUpload.size()); - - LatchedActionListener latchedActionListener = new LatchedActionListener<>( - ActionListener.wrap((UploadedIndexMetadata uploadedIndexMetadata) -> { - logger.trace( - String.format(Locale.ROOT, "IndexMetadata uploaded successfully for %s", uploadedIndexMetadata.getIndexName()) - ); - result.add(uploadedIndexMetadata); + private UploadedMetadataResults writeMetadataInParallel( + ClusterState clusterState, + List indexToUpload, + Map prevIndexMetadataByName, + Map customToUpload, + boolean uploadCoordinationMetadata, + boolean uploadSettingsMetadata, + boolean uploadTemplateMetadata, + boolean uploadDiscoveryNodes, + boolean uploadClusterBlock, + List indicesRoutingToUpload) throws IOException { + int totalUploadTasks = indexToUpload.size() + customToUpload.size() + (uploadCoordinationMetadata ? 1 : 0) + (uploadSettingsMetadata + ? 1 : 0) + (uploadTemplateMetadata ? 1 : 0) + (uploadDiscoveryNodes ? 1 : 0) + (uploadClusterBlock ? 1 : 0) + indicesRoutingToUpload.size(); + CountDownLatch latch = new CountDownLatch(totalUploadTasks); + Map> uploadTasks = new HashMap<>(totalUploadTasks); + Map results = new HashMap<>(totalUploadTasks); + List exceptionList = Collections.synchronizedList(new ArrayList<>(totalUploadTasks)); + + LatchedActionListener listener = new LatchedActionListener<>( + ActionListener.wrap((ClusterMetadataManifest.UploadedMetadata uploadedMetadata) -> { + logger.info(String.format(Locale.ROOT, "Metadata component %s uploaded successfully.", uploadedMetadata.getComponent())); + results.put(uploadedMetadata.getComponent(), uploadedMetadata); }, ex -> { - assert ex instanceof RemoteStateTransferException; logger.error( - () -> new ParameterizedMessage("Exception during transfer of IndexMetadata to Remote {}", ex.getMessage()), + () -> new ParameterizedMessage("Exception during transfer of Metadata Fragment to Remote {}", ex.getMessage()), ex ); exceptionList.add(ex); @@ -462,18 +474,109 @@ private List writeIndexMetadataParallel(ClusterState clus latch ); - for (IndexMetadata indexMetadata : toUpload) { - // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/index/ftqsCnn9TgOX/metadata_4_1690947200 - writeIndexMetadataAsync(clusterState, indexMetadata, latchedActionListener); + if (uploadSettingsMetadata) { + uploadTasks.put( + SETTING_METADATA, + remoteGlobalMetadataManager.getAsyncMetadataWriteAction( + clusterState.metadata().persistentSettings(), + clusterState.metadata().version(), + clusterState.metadata().clusterUUID(), + listener, + null + ) + ); + } + if (uploadCoordinationMetadata) { + uploadTasks.put( + COORDINATION_METADATA, + remoteGlobalMetadataManager.getAsyncMetadataWriteAction( + clusterState.metadata().coordinationMetadata(), + clusterState.metadata().version(), + clusterState.metadata().clusterUUID(), + listener, + null + ) + ); + } + if (uploadTemplateMetadata) { + uploadTasks.put( + TEMPLATES_METADATA, + remoteGlobalMetadataManager.getAsyncMetadataWriteAction( + clusterState.metadata().templatesMetadata(), + clusterState.metadata().version(), + clusterState.metadata().clusterUUID(), + listener, + null + ) + ); + } + if (uploadDiscoveryNodes) { + uploadTasks.put( + DISCOVERY_NODES, + remoteClusterStateAttributesManager.getAsyncMetadataWriteAction( + clusterState, + DISCOVERY_NODES, + clusterState.nodes(), + listener + ) + ); + } + if (uploadClusterBlock) { + uploadTasks.put( + CLUSTER_BLOCKS, + remoteClusterStateAttributesManager.getAsyncMetadataWriteAction( + clusterState, + CLUSTER_BLOCKS, + clusterState.blocks(), + listener + ) + ); + } + customToUpload.forEach((key, value) -> { + String customComponent = String.join(CUSTOM_DELIMITER, CUSTOM_METADATA, key); + uploadTasks.put( + customComponent, + remoteGlobalMetadataManager.getAsyncMetadataWriteAction( + value, + clusterState.metadata().version(), + clusterState.metadata().clusterUUID(), + listener, + key + ) + ); + }); + indexToUpload.forEach(indexMetadata -> { + uploadTasks.put( + indexMetadata.getIndexName(), + remoteIndexMetadataManager.getIndexMetadataAsyncAction(indexMetadata, clusterState.metadata().clusterUUID(), listener) + ); + }); + + indicesRoutingToUpload.forEach(indexRoutingTable -> { + try { + uploadTasks.put( + indexRoutingTable.getIndex().getName() + "--indexRouting", + remoteRoutingTableService.getIndexRoutingAsyncAction(clusterState, indexRoutingTable, listener) + ); + } catch (IOException e) { + e.printStackTrace(); + } + }); + + // start async upload of all required metadata files + for (CheckedRunnable uploadTask : uploadTasks.values()) { + uploadTask.run(); } + invokeIndexMetadataUploadListeners(indexToUpload, prevIndexMetadataByName, latch, exceptionList); try { - if (latch.await(getIndexMetadataUploadTimeout().millis(), TimeUnit.MILLISECONDS) == false) { + if (latch.await(remoteGlobalMetadataManager.getGlobalMetadataUploadTimeout().millis(), TimeUnit.MILLISECONDS) == false) { + // TODO: We should add metrics where transfer is timing out. [Issue: #10687] RemoteStateTransferException ex = new RemoteStateTransferException( String.format( Locale.ROOT, - "Timed out waiting for transfer of index metadata to complete - %s", - toUpload.stream().map(IndexMetadata::getIndex).map(Index::toString).collect(Collectors.joining("")) + "Timed out waiting for transfer of following metadata to complete - %s", + String.join(", ", uploadTasks.keySet()) ) ); exceptionList.forEach(ex::addSuppressed); @@ -484,65 +587,116 @@ private List writeIndexMetadataParallel(ClusterState clus RemoteStateTransferException exception = new RemoteStateTransferException( String.format( Locale.ROOT, - "Timed out waiting for transfer of index metadata to complete - %s", - toUpload.stream().map(IndexMetadata::getIndex).map(Index::toString).collect(Collectors.joining("")) + "Timed out waiting for transfer of metadata to complete - %s", + String.join(", ", uploadTasks.keySet()) ), ex ); Thread.currentThread().interrupt(); throw exception; } - if (exceptionList.size() > 0) { + if (!exceptionList.isEmpty()) { RemoteStateTransferException exception = new RemoteStateTransferException( String.format( Locale.ROOT, - "Exception during transfer of IndexMetadata to Remote %s", - toUpload.stream().map(IndexMetadata::getIndex).map(Index::toString).collect(Collectors.joining("")) + "Exception during transfer of following metadata to Remote - %s", + String.join(", ", uploadTasks.keySet()) ) ); exceptionList.forEach(exception::addSuppressed); throw exception; } - return result; + UploadedMetadataResults response = new UploadedMetadataResults(); + results.forEach((name, uploadedMetadata) -> { + if (uploadedMetadata.getClass().equals(UploadedIndexMetadata.class) && + uploadedMetadata.getComponent().contains(RemoteRoutingTableService.INDEX_ROUTING_METADATA_PREFIX)) { + response.uploadedIndicesRoutingMetadata.add((UploadedIndexMetadata) uploadedMetadata); + } else if (name.contains(CUSTOM_METADATA)) { + // component name for custom metadata will look like custom-- + String custom = name.split(DELIMITER)[0].split(CUSTOM_DELIMITER)[1]; + response.uploadedCustomMetadataMap.put( + custom, + new UploadedMetadataAttribute(custom, uploadedMetadata.getUploadedFilename()) + ); + } else if (COORDINATION_METADATA.equals(name)) { + response.uploadedCoordinationMetadata = (UploadedMetadataAttribute) uploadedMetadata; + } else if (SETTING_METADATA.equals(name)) { + response.uploadedSettingsMetadata = (UploadedMetadataAttribute) uploadedMetadata; + } else if (TEMPLATES_METADATA.equals(name)) { + response.uploadedTemplatesMetadata = (UploadedMetadataAttribute) uploadedMetadata; + } else if (name.contains(UploadedIndexMetadata.COMPONENT_PREFIX)) { + response.uploadedIndexMetadata.add((UploadedIndexMetadata) uploadedMetadata); + } else if (DISCOVERY_NODES.equals(uploadedMetadata.getComponent())) { + response.uploadedDiscoveryNodes = (UploadedMetadataAttribute) uploadedMetadata; + } else if (CLUSTER_BLOCKS.equals(uploadedMetadata.getComponent())) { + response.uploadedClusterBlocks = (UploadedMetadataAttribute) uploadedMetadata; + } else { + throw new IllegalStateException("Unexpected metadata component " + uploadedMetadata.getComponent()); + } + }); + logger.info("response {}", response.uploadedIndicesRoutingMetadata.toString()); + return response; } /** - * Allows async Upload of IndexMetadata to remote - * - * @param clusterState current ClusterState - * @param indexMetadata {@link IndexMetadata} to upload - * @param latchedActionListener listener to respond back on after upload finishes + * Invokes the index metadata upload listener but does not wait for the execution to complete. */ - private void writeIndexMetadataAsync( - ClusterState clusterState, - IndexMetadata indexMetadata, - LatchedActionListener latchedActionListener - ) throws IOException { - final BlobContainer indexMetadataContainer = indexMetadataContainer( - clusterState.getClusterName().value(), - clusterState.metadata().clusterUUID(), - indexMetadata.getIndexUUID() - ); - final String indexMetadataFilename = indexMetadataFileName(indexMetadata); - ActionListener completionListener = ActionListener.wrap( - resp -> latchedActionListener.onResponse( - new UploadedIndexMetadata( - indexMetadata.getIndex().getName(), - indexMetadata.getIndexUUID(), - indexMetadataContainer.path().buildAsString() + indexMetadataFilename - ) + private void invokeIndexMetadataUploadListeners( + List updatedIndexMetadataList, + Map prevIndexMetadataByName, + CountDownLatch latch, + List exceptionList + ) { + for (IndexMetadataUploadListener listener : indexMetadataUploadListeners) { + String listenerName = listener.getClass().getSimpleName(); + listener.onUpload( + updatedIndexMetadataList, + prevIndexMetadataByName, + getIndexMetadataUploadActionListener(updatedIndexMetadataList, prevIndexMetadataByName, latch, exceptionList, listenerName) + ); + } + + } + + private ActionListener getIndexMetadataUploadActionListener( + List newIndexMetadataList, + Map prevIndexMetadataByName, + CountDownLatch latch, + List exceptionList, + String listenerName + ) { + long startTime = System.nanoTime(); + return new LatchedActionListener<>( + ActionListener.wrap( + ignored -> logger.trace( + new ParameterizedMessage( + "listener={} : Invoked successfully with indexMetadataList={} prevIndexMetadataList={} tookTimeNs={}", + listenerName, + newIndexMetadataList, + prevIndexMetadataByName.values(), + (System.nanoTime() - startTime) + ) + ), + ex -> { + logger.error( + new ParameterizedMessage( + "listener={} : Exception during invocation with indexMetadataList={} prevIndexMetadataList={} tookTimeNs={}", + listenerName, + newIndexMetadataList, + prevIndexMetadataByName.values(), + (System.nanoTime() - startTime) + ), + ex + ); + exceptionList.add(ex); + } ), - ex -> latchedActionListener.onFailure(new RemoteStateTransferException(indexMetadata.getIndex().toString(), ex)) + latch ); + } - INDEX_METADATA_FORMAT.writeAsyncWithUrgentPriority( - indexMetadata, - indexMetadataContainer, - indexMetadataFilename, - blobStoreRepository.getCompressor(), - completionListener, - FORMAT_PARAMS - ); + public RemoteManifestManager getRemoteManifestManager() { + return remoteManifestManager; } @Nullable @@ -554,22 +708,50 @@ public ClusterMetadataManifest markLastStateAsCommitted(ClusterState clusterStat return null; } assert previousManifest != null : "Last cluster metadata manifest is not set"; - ClusterMetadataManifest committedManifest = uploadManifest( + ClusterMetadataManifest committedManifest = remoteManifestManager.uploadManifest( clusterState, previousManifest.getIndices(), previousManifest.getPreviousClusterUUID(), - previousManifest.getGlobalMetadataFileName(), - true + previousManifest.getCoordinationMetadata(), + previousManifest.getSettingsMetadata(), + previousManifest.getTemplatesMetadata(), + previousManifest.getCustomMetadataMap(), + previousManifest.getDiscoveryNodesMetadata(), + previousManifest.getClusterBlocksMetadata(), + previousManifest.getDiffManifest(), + previousManifest.getIndicesRouting(), true ); - deleteStaleClusterUUIDs(clusterState, committedManifest); + if (!previousManifest.isClusterUUIDCommitted() && committedManifest.isClusterUUIDCommitted()) { + remoteClusterStateCleanupManager.deleteStaleClusterUUIDs(clusterState, committedManifest); + } + return committedManifest; } + /** + * Fetch latest ClusterMetadataManifest from remote state store + * + * @param clusterUUID uuid of cluster state to refer to in remote + * @param clusterName name of the cluster + * @return ClusterMetadataManifest + */ + public Optional getLatestClusterMetadataManifest(String clusterName, String clusterUUID) { + return remoteManifestManager.getLatestClusterMetadataManifest(clusterName, clusterUUID); + } + + public Optional getClusterMetadataManifestByTermVersion(String clusterName, String clusterUUID, long term, long version) { + return remoteManifestManager.getClusterMetadataManifestByTermVersion(clusterName, clusterUUID, term, version); + } + @Override public void close() throws IOException { + remoteClusterStateCleanupManager.close(); if (blobStoreRepository != null) { IOUtils.close(blobStoreRepository); } + if(this.remoteRoutingTableService != null) { + this.remoteRoutingTableService.close(); + } } public void start() { @@ -581,304 +763,365 @@ public void start() { final Repository repository = repositoriesService.get().repository(remoteStoreRepo); assert repository instanceof BlobStoreRepository : "Repository should be instance of BlobStoreRepository"; blobStoreRepository = (BlobStoreRepository) repository; - } - - private ClusterMetadataManifest uploadManifest( - ClusterState clusterState, - List uploadedIndexMetadata, - String previousClusterUUID, - String globalClusterMetadataFileName, - boolean committed - ) throws IOException { - synchronized (this) { - final String manifestFileName = getManifestFileName(clusterState.term(), clusterState.version(), committed); - final ClusterMetadataManifest manifest = new ClusterMetadataManifest( - clusterState.term(), - clusterState.getVersion(), - clusterState.metadata().clusterUUID(), - clusterState.stateUUID(), - Version.CURRENT, - nodeId, - committed, - MANIFEST_CURRENT_CODEC_VERSION, - globalClusterMetadataFileName, - uploadedIndexMetadata, - previousClusterUUID, - clusterState.metadata().clusterUUIDCommitted() - ); - writeMetadataManifest(clusterState.getClusterName().value(), clusterState.metadata().clusterUUID(), manifest, manifestFileName); - return manifest; - } - } - - private void writeMetadataManifest(String clusterName, String clusterUUID, ClusterMetadataManifest uploadManifest, String fileName) - throws IOException { - AtomicReference result = new AtomicReference(); - AtomicReference exceptionReference = new AtomicReference(); - - final BlobContainer metadataManifestContainer = manifestContainer(clusterName, clusterUUID); - - // latch to wait until upload is not finished - CountDownLatch latch = new CountDownLatch(1); - - LatchedActionListener completionListener = new LatchedActionListener<>(ActionListener.wrap(resp -> { - logger.trace(String.format(Locale.ROOT, "Manifest file uploaded successfully.")); - }, ex -> { exceptionReference.set(ex); }), latch); - - CLUSTER_METADATA_MANIFEST_FORMAT.writeAsyncWithUrgentPriority( - uploadManifest, - metadataManifestContainer, - fileName, - blobStoreRepository.getCompressor(), - completionListener, - FORMAT_PARAMS - ); - - try { - if (latch.await(getMetadataManifestUploadTimeout().millis(), TimeUnit.MILLISECONDS) == false) { - RemoteStateTransferException ex = new RemoteStateTransferException( - String.format(Locale.ROOT, "Timed out waiting for transfer of manifest file to complete") - ); - throw ex; - } - } catch (InterruptedException ex) { - RemoteStateTransferException exception = new RemoteStateTransferException( - String.format(Locale.ROOT, "Timed out waiting for transfer of manifest file to complete - %s"), - ex - ); - Thread.currentThread().interrupt(); - throw exception; - } - if (exceptionReference.get() != null) { - throw new RemoteStateTransferException(exceptionReference.get().getMessage(), exceptionReference.get()); + if(this.remoteRoutingTableService != null) { + this.remoteRoutingTableService.start(); } - logger.debug( - "Metadata manifest file [{}] written during [{}] phase. ", - fileName, - uploadManifest.isCommitted() ? "commit" : "publish" - ); + String clusterName = ClusterName.CLUSTER_NAME_SETTING.get(settings).value(); + remoteGlobalMetadataManager = new RemoteGlobalMetadataManager(blobStoreRepository, clusterSettings,threadpool, getBlobStoreTransferService(), clusterName); + remoteIndexMetadataManager = new RemoteIndexMetadataManager(blobStoreRepository, clusterSettings,threadpool, clusterName, getBlobStoreTransferService()); + remoteClusterStateAttributesManager = new RemoteClusterStateAttributesManager(getBlobStoreTransferService(), blobStoreRepository, threadpool, clusterName); + remoteManifestManager = new RemoteManifestManager(blobStoreRepository, clusterSettings, nodeId); + remoteClusterStateCleanupManager.start(); } - private String fetchPreviousClusterUUID(String clusterName, String clusterUUID) { - final Optional latestManifest = getLatestClusterMetadataManifest(clusterName, clusterUUID); - if (!latestManifest.isPresent()) { - final String previousClusterUUID = getLastKnownUUIDFromRemote(clusterName); - assert !clusterUUID.equals(previousClusterUUID) : "Last cluster UUID is same current cluster UUID"; - return previousClusterUUID; - } - return latestManifest.get().getPreviousClusterUUID(); + private void setSlowWriteLoggingThreshold(TimeValue slowWriteLoggingThreshold) { + this.slowWriteLoggingThreshold = slowWriteLoggingThreshold; } - private BlobContainer indexMetadataContainer(String clusterName, String clusterUUID, String indexUUID) { - // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/index/ftqsCnn9TgOX - return blobStoreRepository.blobStore() - .blobContainer(getCusterMetadataBasePath(clusterName, clusterUUID).add(INDEX_PATH_TOKEN).add(indexUUID)); + //Package private for unit test + RemoteRoutingTableService getRemoteRoutingTableService() { + return this.remoteRoutingTableService; } - private BlobContainer globalMetadataContainer(String clusterName, String clusterUUID) { - // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/global-metadata/ - return blobStoreRepository.blobStore() - .blobContainer(getCusterMetadataBasePath(clusterName, clusterUUID).add(GLOBAL_METADATA_PATH_TOKEN)); + ThreadPool getThreadpool() { + return threadpool; } - private BlobContainer manifestContainer(String clusterName, String clusterUUID) { - // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/manifest - return blobStoreRepository.blobStore().blobContainer(getManifestFolderPath(clusterName, clusterUUID)); + BlobStoreRepository getBlobStoreRepository() { + return blobStoreRepository; } - private BlobPath getCusterMetadataBasePath(String clusterName, String clusterUUID) { - return blobStoreRepository.basePath().add(encodeString(clusterName)).add(CLUSTER_STATE_PATH_TOKEN).add(clusterUUID); + BlobStore getBlobStore() { + return blobStoreRepository.blobStore(); } - private BlobContainer clusterUUIDContainer(String clusterName) { - return blobStoreRepository.blobStore() - .blobContainer( - blobStoreRepository.basePath() - .add(Base64.getUrlEncoder().withoutPadding().encodeToString(clusterName.getBytes(StandardCharsets.UTF_8))) - .add(CLUSTER_STATE_PATH_TOKEN) - ); - } - - private void setSlowWriteLoggingThreshold(TimeValue slowWriteLoggingThreshold) { - this.slowWriteLoggingThreshold = slowWriteLoggingThreshold; - } - - private void setIndexMetadataUploadTimeout(TimeValue newIndexMetadataUploadTimeout) { - this.indexMetadataUploadTimeout = newIndexMetadataUploadTimeout; - } - - private void setGlobalMetadataUploadTimeout(TimeValue newGlobalMetadataUploadTimeout) { - this.globalMetadataUploadTimeout = newGlobalMetadataUploadTimeout; - } - - private void setMetadataManifestUploadTimeout(TimeValue newMetadataManifestUploadTimeout) { - this.metadataManifestUploadTimeout = newMetadataManifestUploadTimeout; - } + /** + * Fetch latest ClusterState from remote, including global metadata, index metadata and cluster state version + * + * @param clusterUUID uuid of cluster state to refer to in remote + * @param clusterName name of the cluster + * @return {@link IndexMetadata} + */ + public ClusterState getLatestClusterState(String clusterName, String clusterUUID) throws IOException { + start(); + Optional clusterMetadataManifest = remoteManifestManager.getLatestClusterMetadataManifest( + clusterName, + clusterUUID + ); + if (clusterMetadataManifest.isEmpty()) { + throw new IllegalStateException( + String.format(Locale.ROOT, "Latest cluster metadata manifest is not present for the provided clusterUUID: %s", clusterUUID) + ); + } - public TimeValue getIndexMetadataUploadTimeout() { - return this.indexMetadataUploadTimeout; + return getClusterStateForManifest(clusterName, clusterMetadataManifest.get(), nodeId); } - public TimeValue getGlobalMetadataUploadTimeout() { - return this.globalMetadataUploadTimeout; - } + private ClusterState readClusterStateInParallel( + ClusterState previousState, + ClusterMetadataManifest manifest, + String clusterName, + String clusterUUID, + String localNodeId, + List indicesToRead, + Map customToRead, + boolean readCoordinationMetadata, + boolean readSettingsMetadata, + boolean readTemplatesMetadata, + boolean readDiscoveryNodes, + boolean readClusterBlocks, + List indicesRoutingToRead + ) throws IOException { + int totalReadTasks = indicesToRead.size() + customToRead.size() + indicesRoutingToRead.size() + (readCoordinationMetadata ? 1 : 0) + (readSettingsMetadata ? 1 : 0) + (readTemplatesMetadata ? 1 : 0) + (readDiscoveryNodes ? 1 : 0) + (readClusterBlocks ? 1 : 0); + CountDownLatch latch = new CountDownLatch(totalReadTasks); + List> asyncMetadataReadActions = new ArrayList<>(); + List readResults = new ArrayList<>(); + List readIndexRoutingTableResults = new ArrayList<>(); + List exceptionList = Collections.synchronizedList(new ArrayList<>(totalReadTasks)); + + LatchedActionListener listener = new LatchedActionListener<>( + ActionListener.wrap( + response -> { + logger.debug("Successfully read cluster state component from remote"); + readResults.add(response); + }, + ex -> { + logger.error("Failed to read cluster state from remote", ex); + exceptionList.add(ex); + } + ), + latch + ); - public TimeValue getMetadataManifestUploadTimeout() { - return this.metadataManifestUploadTimeout; - } + for (UploadedIndexMetadata indexMetadata : indicesToRead) { + asyncMetadataReadActions.add( + remoteIndexMetadataManager.getAsyncIndexMetadataReadAction( + clusterUUID, + indexMetadata.getUploadedFilename(), + listener + ) + ); + } - static String getManifestFileName(long term, long version, boolean committed) { - // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/manifest/manifest______C/P____ - return String.join( - DELIMITER, - MANIFEST_PATH_TOKEN, - RemoteStoreUtils.invertLong(term), - RemoteStoreUtils.invertLong(version), - (committed ? "C" : "P"), // C for committed and P for published - RemoteStoreUtils.invertLong(System.currentTimeMillis()), - String.valueOf(MANIFEST_CURRENT_CODEC_VERSION) // Keep the codec version at last place only, during read we reads last place to - // determine codec version. + LatchedActionListener routingTableLatchedActionListener = new LatchedActionListener<>( + ActionListener.wrap( + response -> { + logger.debug("Successfully read cluster state component from remote"); + readIndexRoutingTableResults.add(response); + }, + ex -> { + logger.error("Failed to read cluster state from remote", ex); + exceptionList.add(ex); + } + ), + latch ); - } - static String indexMetadataFileName(IndexMetadata indexMetadata) { - // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/index//metadata______ - return String.join( - DELIMITER, - METADATA_FILE_PREFIX, - RemoteStoreUtils.invertLong(indexMetadata.getVersion()), - RemoteStoreUtils.invertLong(System.currentTimeMillis()), - String.valueOf(INDEX_METADATA_CURRENT_CODEC_VERSION) // Keep the codec version at last place only, during read we reads last - // place to determine codec version. - ); - } + for (UploadedIndexMetadata indexRouting: indicesRoutingToRead) { + asyncMetadataReadActions.add( + remoteRoutingTableService.getAsyncIndexMetadataReadAction( + indexRouting.getUploadedFilename(), + new Index(indexRouting.getIndexName(), indexRouting.getIndexUUID()), + routingTableLatchedActionListener + ) + ); + } - private static String globalMetadataFileName(Metadata metadata) { - // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/global-metadata/metadata______ - return String.join( - DELIMITER, - METADATA_FILE_PREFIX, - RemoteStoreUtils.invertLong(metadata.version()), - RemoteStoreUtils.invertLong(System.currentTimeMillis()), - String.valueOf(GLOBAL_METADATA_CURRENT_CODEC_VERSION) - ); - } + for (Map.Entry entry : customToRead.entrySet()) { + asyncMetadataReadActions.add( + remoteGlobalMetadataManager.getAsyncMetadataReadAction( + clusterName, + clusterUUID, + CUSTOM_METADATA, + entry.getKey(), + entry.getValue().getUploadedFilename(), + listener + ) + ); + } - private BlobPath getManifestFolderPath(String clusterName, String clusterUUID) { - return getCusterMetadataBasePath(clusterName, clusterUUID).add(MANIFEST_PATH_TOKEN); - } + if (readCoordinationMetadata) { + asyncMetadataReadActions.add( + remoteGlobalMetadataManager.getAsyncMetadataReadAction( + clusterName, + clusterUUID, + COORDINATION_METADATA, + COORDINATION_METADATA, + manifest.getCoordinationMetadata().getUploadedFilename(), + listener + ) + ); + } - /** - * Fetch latest index metadata from remote cluster state - * - * @param clusterUUID uuid of cluster state to refer to in remote - * @param clusterName name of the cluster - * @param clusterMetadataManifest manifest file of cluster - * @return {@code Map} latest IndexUUID to IndexMetadata map - */ - private Map getIndexMetadataMap( - String clusterName, - String clusterUUID, - ClusterMetadataManifest clusterMetadataManifest - ) { - assert Objects.equals(clusterUUID, clusterMetadataManifest.getClusterUUID()) - : "Corrupt ClusterMetadataManifest found. Cluster UUID mismatch."; - Map remoteIndexMetadata = new HashMap<>(); - for (UploadedIndexMetadata uploadedIndexMetadata : clusterMetadataManifest.getIndices()) { - IndexMetadata indexMetadata = getIndexMetadata(clusterName, clusterUUID, uploadedIndexMetadata); - remoteIndexMetadata.put(uploadedIndexMetadata.getIndexUUID(), indexMetadata); + if (readSettingsMetadata) { + asyncMetadataReadActions.add( + remoteGlobalMetadataManager.getAsyncMetadataReadAction( + clusterName, + clusterUUID, + SETTING_METADATA, + SETTING_METADATA, + manifest.getSettingsMetadata().getUploadedFilename(), + listener + ) + ); } - return remoteIndexMetadata; - } - /** - * Fetch index metadata from remote cluster state - * - * @param clusterUUID uuid of cluster state to refer to in remote - * @param clusterName name of the cluster - * @param uploadedIndexMetadata {@link UploadedIndexMetadata} contains details about remote location of index metadata - * @return {@link IndexMetadata} - */ - private IndexMetadata getIndexMetadata(String clusterName, String clusterUUID, UploadedIndexMetadata uploadedIndexMetadata) { - BlobContainer blobContainer = indexMetadataContainer(clusterName, clusterUUID, uploadedIndexMetadata.getIndexUUID()); - try { - String[] splitPath = uploadedIndexMetadata.getUploadedFilename().split("/"); - return INDEX_METADATA_FORMAT.read( - blobContainer, - splitPath[splitPath.length - 1], - blobStoreRepository.getNamedXContentRegistry() + if (readTemplatesMetadata) { + asyncMetadataReadActions.add( + remoteGlobalMetadataManager.getAsyncMetadataReadAction( + clusterName, + clusterUUID, + TEMPLATES_METADATA, + TEMPLATES_METADATA, + manifest.getTemplatesMetadata().getUploadedFilename(), + listener + ) ); - } catch (IOException e) { - throw new IllegalStateException( - String.format(Locale.ROOT, "Error while downloading IndexMetadata - %s", uploadedIndexMetadata.getUploadedFilename()), - e + } + + if (readDiscoveryNodes) { + asyncMetadataReadActions.add( + remoteClusterStateAttributesManager.getAsyncMetadataReadAction( + clusterUUID, + DISCOVERY_NODES, + manifest.getDiscoveryNodesMetadata().getUploadedFilename(), + listener + ) ); } - } - /** - * Fetch latest ClusterState from remote, including global metadata, index metadata and cluster state version - * - * @param clusterUUID uuid of cluster state to refer to in remote - * @param clusterName name of the cluster - * @return {@link IndexMetadata} - */ - public ClusterState getLatestClusterState(String clusterName, String clusterUUID) { - start(); - Optional clusterMetadataManifest = getLatestClusterMetadataManifest(clusterName, clusterUUID); - if (clusterMetadataManifest.isEmpty()) { - throw new IllegalStateException( - String.format(Locale.ROOT, "Latest cluster metadata manifest is not present for the provided clusterUUID: %s", clusterUUID) + if (readClusterBlocks) { + asyncMetadataReadActions.add( + remoteClusterStateAttributesManager.getAsyncMetadataReadAction( + clusterUUID, + CLUSTER_BLOCKS, + manifest.getClusterBlocksMetadata().getUploadedFilename(), + listener + ) ); } - // Fetch Global Metadata - Metadata globalMetadata = getGlobalMetadata(clusterName, clusterUUID, clusterMetadataManifest.get()); - // Fetch Index Metadata - Map indices = getIndexMetadataMap(clusterName, clusterUUID, clusterMetadataManifest.get()); + for (CheckedRunnable asyncMetadataReadAction : asyncMetadataReadActions) { + asyncMetadataReadAction.run(); + } + try { + if (latch.await(this.remoteStateReadTimeout.getMillis(), TimeUnit.MILLISECONDS) == false) { + RemoteStateTransferException exception = new RemoteStateTransferException( + "Timed out waiting to read cluster state from remote within timeout " + this.remoteStateReadTimeout + ); + exceptionList.forEach(exception::addSuppressed); + throw exception; + } + } catch (InterruptedException e) { + exceptionList.forEach(e::addSuppressed); + RemoteStateTransferException ex = new RemoteStateTransferException("Interrupted while waiting to read cluster state from metadata"); + Thread.currentThread().interrupt(); + throw ex; + } + + if (!exceptionList.isEmpty()) { + RemoteStateTransferException exception = new RemoteStateTransferException("Exception during reading cluster state from remote"); + exceptionList.forEach(exception::addSuppressed); + throw exception; + } + + ClusterState.Builder clusterStateBuilder = ClusterState.builder(previousState); + AtomicReference discoveryNodesBuilder = new AtomicReference<>(DiscoveryNodes.builder()); + Metadata.Builder metadataBuilder = Metadata.builder(previousState.metadata()); + metadataBuilder.version(manifest.getMetadataVersion()); + metadataBuilder.clusterUUID(manifest.getClusterUUID()); + metadataBuilder.clusterUUIDCommitted(manifest.isClusterUUIDCommitted()); Map indexMetadataMap = new HashMap<>(); - indices.values().forEach(indexMetadata -> { indexMetadataMap.put(indexMetadata.getIndex().getName(), indexMetadata); }); + Map indicesRouting = new HashMap<>(previousState.routingTable().getIndicesRouting()); + + readResults.forEach(remoteReadResult -> { + switch (remoteReadResult.getComponent()) { + case INDEX_PATH_TOKEN: + IndexMetadata indexMetadata = (IndexMetadata) remoteReadResult.getObj(); + indexMetadataMap.put(indexMetadata.getIndex().getName(), indexMetadata); + break; + case CUSTOM_METADATA: + metadataBuilder.putCustom(remoteReadResult.getComponentName(), (Metadata.Custom) remoteReadResult.getObj()); + break; + case COORDINATION_METADATA: + metadataBuilder.coordinationMetadata((CoordinationMetadata) remoteReadResult.getObj()); + break; + case SETTING_METADATA: + metadataBuilder.persistentSettings((Settings) remoteReadResult.getObj()); + break; + case TEMPLATES_METADATA: + metadataBuilder.templates((TemplatesMetadata) remoteReadResult.getObj()); + break; + case CLUSTER_STATE_ATTRIBUTE: + if (remoteReadResult.getComponentName().equals(DISCOVERY_NODES)) { + discoveryNodesBuilder.set(DiscoveryNodes.builder((DiscoveryNodes) remoteReadResult.getObj())); + } else if (remoteReadResult.getComponentName().equals(CLUSTER_BLOCKS)) { + clusterStateBuilder.blocks((ClusterBlocks) remoteReadResult.getObj()); + } + break; + default: + throw new IllegalStateException("Unknown component: " + remoteReadResult.getComponent()); + } + }); + + readIndexRoutingTableResults.forEach(remoteIndexRoutingResult -> { + indicesRouting.put(remoteIndexRoutingResult.getIndexName(), remoteIndexRoutingResult.getIndexRoutingTable()); + }); - return ClusterState.builder(ClusterState.EMPTY_STATE) - .version(clusterMetadataManifest.get().getStateVersion()) - .metadata(Metadata.builder(globalMetadata).indices(indexMetadataMap).build()) + metadataBuilder.indices(indexMetadataMap); + if (readDiscoveryNodes) { + clusterStateBuilder.nodes(discoveryNodesBuilder.get().localNodeId(localNodeId)); + } + return clusterStateBuilder.metadata(metadataBuilder) + .version(manifest.getStateVersion()) + .stateUUID(manifest.getStateUUID()) + .routingTable(new RoutingTable(manifest.getRoutingTableVersion(), indicesRouting)) .build(); } - private Metadata getGlobalMetadata(String clusterName, String clusterUUID, ClusterMetadataManifest clusterMetadataManifest) { - String globalMetadataFileName = clusterMetadataManifest.getGlobalMetadataFileName(); - try { - // Fetch Global metadata - if (globalMetadataFileName != null) { - String[] splitPath = globalMetadataFileName.split("/"); - return GLOBAL_METADATA_FORMAT.read( - globalMetadataContainer(clusterName, clusterUUID), - splitPath[splitPath.length - 1], - blobStoreRepository.getNamedXContentRegistry() - ); - } else { - return Metadata.EMPTY_METADATA; + public ClusterState getClusterStateForManifest(String clusterName, ClusterMetadataManifest manifest, String localNodeId) throws IOException { + return readClusterStateInParallel( + ClusterState.builder(new ClusterName(clusterName)).build(), + manifest, + clusterName, + manifest.getClusterUUID(), + localNodeId, + manifest.getIndices(), + manifest.getCustomMetadataMap(), + manifest.getCoordinationMetadata() != null, + manifest.getSettingsMetadata() != null, + manifest.getTemplatesMetadata() != null, + manifest.getDiscoveryNodesMetadata() != null, + manifest.getClusterBlocksMetadata() != null, + manifest.getIndicesRouting() + ); + } + + public ClusterState getClusterStateUsingDiff(String clusterName, ClusterMetadataManifest manifest, ClusterState previousState, String localNodeId) throws IOException { + assert manifest.getDiffManifest() != null; + ClusterStateDiffManifest diff = manifest.getDiffManifest(); + List updatedIndices = diff.getIndicesUpdated().stream().map(idx -> { + Optional uploadedIndexMetadataOptional = manifest.getIndices().stream().filter(idx2 -> idx2.getIndexName().equals(idx)).findFirst(); + assert uploadedIndexMetadataOptional.isPresent() == true; + return uploadedIndexMetadataOptional.get(); + }).collect(Collectors.toList()); + + List updatedIndexRouting = diff.getIndicesRoutingUpdated().stream().map(idx -> { + Optional uploadedIndexMetadataOptional = manifest.getIndicesRouting().stream().filter(idx2 -> idx2.getIndexName().equals(idx)).findFirst(); + assert uploadedIndexMetadataOptional.isPresent() == true; + return uploadedIndexMetadataOptional.get(); + }).collect(Collectors.toList()); + + Map updatedCustomMetadata = new HashMap<>(); + if (diff.getCustomMetadataUpdated() != null) { + for (String customType : diff.getCustomMetadataUpdated()) { + updatedCustomMetadata.put(customType, manifest.getCustomMetadataMap().get(customType)); + } + } + ClusterState updatedClusterState = readClusterStateInParallel( + previousState, + manifest, + clusterName, + manifest.getClusterUUID(), + localNodeId, + updatedIndices, + updatedCustomMetadata, + diff.isCoordinationMetadataUpdated(), + diff.isSettingsMetadataUpdated(), + diff.isTemplatesMetadataUpdated(), + diff.isDiscoveryNodesUpdated(), + diff.isClusterBlocksUpdated(), + updatedIndexRouting + ); + ClusterState.Builder clusterStateBuilder = ClusterState.builder(updatedClusterState); + Metadata.Builder metadataBuilder = Metadata.builder(updatedClusterState.metadata()); + // remove the deleted indices from the metadata + for (String index:diff.getIndicesDeleted()) { + metadataBuilder.remove(index); + } + // remove the deleted metadata customs from the metadata + if (diff.getCustomMetadataDeleted() != null) { + for (String customType : diff.getCustomMetadataDeleted()) { + metadataBuilder.removeCustom(customType); } - } catch (IOException e) { - throw new IllegalStateException( - String.format(Locale.ROOT, "Error while downloading Global Metadata - %s", globalMetadataFileName), - e - ); } - } - /** - * Fetch latest ClusterMetadataManifest from remote state store - * - * @param clusterUUID uuid of cluster state to refer to in remote - * @param clusterName name of the cluster - * @return ClusterMetadataManifest - */ - public Optional getLatestClusterMetadataManifest(String clusterName, String clusterUUID) { - Optional latestManifestFileName = getLatestManifestFileName(clusterName, clusterUUID); - return latestManifestFileName.map(s -> fetchRemoteClusterMetadataManifest(clusterName, clusterUUID, s)); + HashMap indexRoutingTables = new HashMap<>(updatedClusterState.getRoutingTable().getIndicesRouting()); + + for(String indexName: diff.getIndicesRoutingDeleted()){ + indexRoutingTables.remove(indexName); + } + + RoutingTable routingTable = new RoutingTable(manifest.getRoutingTableVersion(), indexRoutingTables); + + return clusterStateBuilder. + stateUUID(manifest.getStateUUID()). + version(manifest.getStateVersion()). + metadata(metadataBuilder). + routingTable(routingTable). + build(); + } /** @@ -890,7 +1133,10 @@ public Optional getLatestClusterMetadataManifest(String public String getLastKnownUUIDFromRemote(String clusterName) { try { Set clusterUUIDs = getAllClusterUUIDs(clusterName); - Map latestManifests = getLatestManifestForAllClusterUUIDs(clusterName, clusterUUIDs); + Map latestManifests = remoteManifestManager.getLatestManifestForAllClusterUUIDs( + clusterName, + clusterUUIDs + ); List validChain = createClusterChain(latestManifests, clusterName); if (validChain.isEmpty()) { return ClusterState.UNKNOWN_UUID; @@ -904,28 +1150,28 @@ public String getLastKnownUUIDFromRemote(String clusterName) { } } - private Set getAllClusterUUIDs(String clusterName) throws IOException { - Map clusterUUIDMetadata = clusterUUIDContainer(clusterName).children(); - if (clusterUUIDMetadata == null) { - return Collections.emptySet(); + public void setRemoteClusterStateEnabled(TimeValue remoteStateReadTimeout) { + this.remoteStateReadTimeout = remoteStateReadTimeout; + } + + + public RemoteClusterStateCleanupManager getCleanupManager() { + return remoteClusterStateCleanupManager; + } + + private BlobStoreTransferService getBlobStoreTransferService() { + if (blobStoreTransferService == null) { + blobStoreTransferService = new BlobStoreTransferService(getBlobStore(), threadpool); } - return Collections.unmodifiableSet(clusterUUIDMetadata.keySet()); + return blobStoreTransferService; } - private Map getLatestManifestForAllClusterUUIDs(String clusterName, Set clusterUUIDs) { - Map manifestsByClusterUUID = new HashMap<>(); - for (String clusterUUID : clusterUUIDs) { - try { - Optional manifest = getLatestClusterMetadataManifest(clusterName, clusterUUID); - manifest.ifPresent(clusterMetadataManifest -> manifestsByClusterUUID.put(clusterUUID, clusterMetadataManifest)); - } catch (Exception e) { - throw new IllegalStateException( - String.format(Locale.ROOT, "Exception in fetching manifest for clusterUUID: %s", clusterUUID), - e - ); - } + Set getAllClusterUUIDs(String clusterName) throws IOException { + Map clusterUUIDMetadata = clusterUUIDContainer(blobStoreRepository, clusterName).children(); + if (clusterUUIDMetadata == null) { + return Collections.emptySet(); } - return manifestsByClusterUUID; + return Collections.unmodifiableSet(clusterUUIDMetadata.keySet()); } /** @@ -1005,7 +1251,7 @@ private Map trimClusterUUIDs( if (!ClusterState.UNKNOWN_UUID.equals(currentManifest.getPreviousClusterUUID())) { ClusterMetadataManifest previousManifest = trimmedUUIDs.get(currentManifest.getPreviousClusterUUID()); if (isMetadataEqual(currentManifest, previousManifest, clusterName) - && isGlobalMetadataEqual(currentManifest, previousManifest, clusterName)) { + && remoteGlobalMetadataManager.isGlobalMetadataEqual(currentManifest, previousManifest, clusterName)) { trimmedUUIDs.remove(clusterUUID); } } @@ -1020,14 +1266,22 @@ private boolean isMetadataEqual(ClusterMetadataManifest first, ClusterMetadataMa } final Map secondIndices = second.getIndices() .stream() - .collect(Collectors.toMap(md -> md.getIndexName(), Function.identity())); + .collect(Collectors.toMap(UploadedIndexMetadata::getIndexName, Function.identity())); for (UploadedIndexMetadata uploadedIndexMetadata : first.getIndices()) { - final IndexMetadata firstIndexMetadata = getIndexMetadata(clusterName, first.getClusterUUID(), uploadedIndexMetadata); + final IndexMetadata firstIndexMetadata = remoteIndexMetadataManager.getIndexMetadata( + uploadedIndexMetadata, + first.getClusterUUID(), + first.getCodecVersion() + ); final UploadedIndexMetadata secondUploadedIndexMetadata = secondIndices.get(uploadedIndexMetadata.getIndexName()); if (secondUploadedIndexMetadata == null) { return false; } - final IndexMetadata secondIndexMetadata = getIndexMetadata(clusterName, second.getClusterUUID(), secondUploadedIndexMetadata); + final IndexMetadata secondIndexMetadata = remoteIndexMetadataManager.getIndexMetadata( + secondUploadedIndexMetadata, + second.getClusterUUID(), + second.getCodecVersion() + ); if (firstIndexMetadata.equals(secondIndexMetadata) == false) { return false; } @@ -1035,105 +1289,10 @@ private boolean isMetadataEqual(ClusterMetadataManifest first, ClusterMetadataMa return true; } - private boolean isGlobalMetadataEqual(ClusterMetadataManifest first, ClusterMetadataManifest second, String clusterName) { - Metadata secondGlobalMetadata = getGlobalMetadata(clusterName, second.getClusterUUID(), second); - Metadata firstGlobalMetadata = getGlobalMetadata(clusterName, first.getClusterUUID(), first); - return Metadata.isGlobalResourcesMetadataEquals(firstGlobalMetadata, secondGlobalMetadata); - } - private boolean isValidClusterUUID(ClusterMetadataManifest manifest) { return manifest.isClusterUUIDCommitted(); } - /** - * Fetch ClusterMetadataManifest files from remote state store in order - * - * @param clusterUUID uuid of cluster state to refer to in remote - * @param clusterName name of the cluster - * @param limit max no of files to fetch - * @return all manifest file names - */ - private List getManifestFileNames(String clusterName, String clusterUUID, int limit) throws IllegalStateException { - try { - - /* - {@link BlobContainer#listBlobsByPrefixInSortedOrder} will list the latest manifest file first - as the manifest file name generated via {@link RemoteClusterStateService#getManifestFileName} ensures - when sorted in LEXICOGRAPHIC order the latest uploaded manifest file comes on top. - */ - return manifestContainer(clusterName, clusterUUID).listBlobsByPrefixInSortedOrder( - MANIFEST_FILE_PREFIX + DELIMITER, - limit, - BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC - ); - } catch (IOException e) { - throw new IllegalStateException("Error while fetching latest manifest file for remote cluster state", e); - } - } - - /** - * Fetch latest ClusterMetadataManifest file from remote state store - * - * @param clusterUUID uuid of cluster state to refer to in remote - * @param clusterName name of the cluster - * @return latest ClusterMetadataManifest filename - */ - private Optional getLatestManifestFileName(String clusterName, String clusterUUID) throws IllegalStateException { - List manifestFilesMetadata = getManifestFileNames(clusterName, clusterUUID, 1); - if (manifestFilesMetadata != null && !manifestFilesMetadata.isEmpty()) { - return Optional.of(manifestFilesMetadata.get(0).name()); - } - logger.info("No manifest file present in remote store for cluster name: {}, cluster UUID: {}", clusterName, clusterUUID); - return Optional.empty(); - } - - /** - * Fetch ClusterMetadataManifest from remote state store - * - * @param clusterUUID uuid of cluster state to refer to in remote - * @param clusterName name of the cluster - * @return ClusterMetadataManifest - */ - private ClusterMetadataManifest fetchRemoteClusterMetadataManifest(String clusterName, String clusterUUID, String filename) - throws IllegalStateException { - try { - return getClusterMetadataManifestBlobStoreFormat(filename).read( - manifestContainer(clusterName, clusterUUID), - filename, - blobStoreRepository.getNamedXContentRegistry() - ); - } catch (IOException e) { - throw new IllegalStateException(String.format(Locale.ROOT, "Error while downloading cluster metadata - %s", filename), e); - } - } - - private ChecksumBlobStoreFormat getClusterMetadataManifestBlobStoreFormat(String fileName) { - long codecVersion = getManifestCodecVersion(fileName); - if (codecVersion == MANIFEST_CURRENT_CODEC_VERSION) { - return CLUSTER_METADATA_MANIFEST_FORMAT; - } else if (codecVersion == ClusterMetadataManifest.CODEC_V0) { - return CLUSTER_METADATA_MANIFEST_FORMAT_V0; - } - - throw new IllegalArgumentException("Cluster metadata manifest file is corrupted, don't have valid codec version"); - } - - private int getManifestCodecVersion(String fileName) { - String[] splitName = fileName.split(DELIMITER); - if (splitName.length == SPLITED_MANIFEST_FILE_LENGTH) { - return Integer.parseInt(splitName[splitName.length - 1]); // Last value would be codec version. - } else if (splitName.length < SPLITED_MANIFEST_FILE_LENGTH) { // Where codec is not part of file name, i.e. default codec version 0 - // is used. - return ClusterMetadataManifest.CODEC_V0; - } else { - throw new IllegalArgumentException("Manifest file name is corrupted"); - } - } - - public static String encodeString(String content) { - return Base64.getUrlEncoder().withoutPadding().encodeToString(content.getBytes(StandardCharsets.UTF_8)); - } - public void writeMetadataFailed() { getStats().stateFailed(); } @@ -1141,7 +1300,7 @@ public void writeMetadataFailed() { /** * Exception for Remote state transfer. */ - static class RemoteStateTransferException extends RuntimeException { + public static class RemoteStateTransferException extends RuntimeException { public RemoteStateTransferException(String errorDesc) { super(errorDesc); @@ -1152,183 +1311,6 @@ public RemoteStateTransferException(String errorDesc, Throwable cause) { } } - /** - * Purges all remote cluster state against provided cluster UUIDs - * - * @param clusterName name of the cluster - * @param clusterUUIDs clusteUUIDs for which the remote state needs to be purged - */ - void deleteStaleUUIDsClusterMetadata(String clusterName, List clusterUUIDs) { - clusterUUIDs.forEach(clusterUUID -> { - getBlobStoreTransferService().deleteAsync( - ThreadPool.Names.REMOTE_PURGE, - getCusterMetadataBasePath(clusterName, clusterUUID), - new ActionListener<>() { - @Override - public void onResponse(Void unused) { - logger.info("Deleted all remote cluster metadata for cluster UUID - {}", clusterUUID); - } - - @Override - public void onFailure(Exception e) { - logger.error( - new ParameterizedMessage( - "Exception occurred while deleting all remote cluster metadata for cluster UUID {}", - clusterUUID - ), - e - ); - remoteStateStats.cleanUpAttemptFailed(); - } - } - ); - }); - } - - /** - * Deletes older than last {@code versionsToRetain} manifests. Also cleans up unreferenced IndexMetadata associated with older manifests - * - * @param clusterName name of the cluster - * @param clusterUUID uuid of cluster state to refer to in remote - * @param manifestsToRetain no of latest manifest files to keep in remote - */ - // package private for testing - void deleteStaleClusterMetadata(String clusterName, String clusterUUID, int manifestsToRetain) { - if (deleteStaleMetadataRunning.compareAndSet(false, true) == false) { - logger.info("Delete stale cluster metadata task is already in progress."); - return; - } - try { - getBlobStoreTransferService().listAllInSortedOrderAsync( - ThreadPool.Names.REMOTE_PURGE, - getManifestFolderPath(clusterName, clusterUUID), - "manifest", - Integer.MAX_VALUE, - new ActionListener<>() { - @Override - public void onResponse(List blobMetadata) { - if (blobMetadata.size() > manifestsToRetain) { - deleteClusterMetadata( - clusterName, - clusterUUID, - blobMetadata.subList(0, manifestsToRetain - 1), - blobMetadata.subList(manifestsToRetain - 1, blobMetadata.size()) - ); - } - deleteStaleMetadataRunning.set(false); - } - - @Override - public void onFailure(Exception e) { - logger.error( - new ParameterizedMessage( - "Exception occurred while deleting Remote Cluster Metadata for clusterUUIDs {}", - clusterUUID - ) - ); - deleteStaleMetadataRunning.set(false); - } - } - ); - } catch (Exception e) { - deleteStaleMetadataRunning.set(false); - throw e; - } - } - - private void deleteClusterMetadata( - String clusterName, - String clusterUUID, - List activeManifestBlobMetadata, - List staleManifestBlobMetadata - ) { - try { - Set filesToKeep = new HashSet<>(); - Set staleManifestPaths = new HashSet<>(); - Set staleIndexMetadataPaths = new HashSet<>(); - Set staleGlobalMetadataPaths = new HashSet<>(); - activeManifestBlobMetadata.forEach(blobMetadata -> { - ClusterMetadataManifest clusterMetadataManifest = fetchRemoteClusterMetadataManifest( - clusterName, - clusterUUID, - blobMetadata.name() - ); - clusterMetadataManifest.getIndices() - .forEach(uploadedIndexMetadata -> filesToKeep.add(uploadedIndexMetadata.getUploadedFilename())); - filesToKeep.add(clusterMetadataManifest.getGlobalMetadataFileName()); - }); - staleManifestBlobMetadata.forEach(blobMetadata -> { - ClusterMetadataManifest clusterMetadataManifest = fetchRemoteClusterMetadataManifest( - clusterName, - clusterUUID, - blobMetadata.name() - ); - staleManifestPaths.add(new BlobPath().add(MANIFEST_PATH_TOKEN).buildAsString() + blobMetadata.name()); - if (filesToKeep.contains(clusterMetadataManifest.getGlobalMetadataFileName()) == false) { - String[] globalMetadataSplitPath = clusterMetadataManifest.getGlobalMetadataFileName().split("/"); - staleGlobalMetadataPaths.add( - new BlobPath().add(GLOBAL_METADATA_PATH_TOKEN).buildAsString() + GLOBAL_METADATA_FORMAT.blobName( - globalMetadataSplitPath[globalMetadataSplitPath.length - 1] - ) - ); - } - clusterMetadataManifest.getIndices().forEach(uploadedIndexMetadata -> { - if (filesToKeep.contains(uploadedIndexMetadata.getUploadedFilename()) == false) { - staleIndexMetadataPaths.add( - new BlobPath().add(INDEX_PATH_TOKEN).add(uploadedIndexMetadata.getIndexUUID()).buildAsString() - + INDEX_METADATA_FORMAT.blobName(uploadedIndexMetadata.getUploadedFilename()) - ); - } - }); - }); - - if (staleManifestPaths.isEmpty()) { - logger.debug("No stale Remote Cluster Metadata files found"); - return; - } - - deleteStalePaths(clusterName, clusterUUID, new ArrayList<>(staleGlobalMetadataPaths)); - deleteStalePaths(clusterName, clusterUUID, new ArrayList<>(staleIndexMetadataPaths)); - deleteStalePaths(clusterName, clusterUUID, new ArrayList<>(staleManifestPaths)); - } catch (IllegalStateException e) { - logger.error("Error while fetching Remote Cluster Metadata manifests", e); - } catch (IOException e) { - logger.error("Error while deleting stale Remote Cluster Metadata files", e); - remoteStateStats.cleanUpAttemptFailed(); - } catch (Exception e) { - logger.error("Unexpected error while deleting stale Remote Cluster Metadata files", e); - remoteStateStats.cleanUpAttemptFailed(); - } - } - - private void deleteStalePaths(String clusterName, String clusterUUID, List stalePaths) throws IOException { - logger.debug(String.format(Locale.ROOT, "Deleting stale files from remote - %s", stalePaths)); - getBlobStoreTransferService().deleteBlobs(getCusterMetadataBasePath(clusterName, clusterUUID), stalePaths); - } - - /** - * Purges all remote cluster state against provided cluster UUIDs - * @param clusterState current state of the cluster - * @param committedManifest last committed ClusterMetadataManifest - */ - public void deleteStaleClusterUUIDs(ClusterState clusterState, ClusterMetadataManifest committedManifest) { - threadpool.executor(ThreadPool.Names.REMOTE_PURGE).execute(() -> { - String clusterName = clusterState.getClusterName().value(); - logger.debug("Deleting stale cluster UUIDs data from remote [{}]", clusterName); - Set allClustersUUIDsInRemote; - try { - allClustersUUIDsInRemote = new HashSet<>(getAllClusterUUIDs(clusterState.getClusterName().value())); - } catch (IOException e) { - logger.info(String.format(Locale.ROOT, "Error while fetching all cluster UUIDs for [%s]", clusterName)); - return; - } - // Retain last 2 cluster uuids data - allClustersUUIDsInRemote.remove(committedManifest.getClusterUUID()); - allClustersUUIDsInRemote.remove(committedManifest.getPreviousClusterUUID()); - deleteStaleUUIDsClusterMetadata(clusterName, new ArrayList<>(allClustersUUIDsInRemote)); - }); - } - public RemotePersistenceStats getStats() { return remoteStateStats; } diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateUtils.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateUtils.java new file mode 100644 index 0000000000000..a383291637a9e --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateUtils.java @@ -0,0 +1,116 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + +import java.util.Locale; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.common.blobstore.BlobContainer; +import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.core.xcontent.ToXContent; +import org.opensearch.repositories.blobstore.BlobStoreRepository; + +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Base64; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class RemoteClusterStateUtils { + public static final String METADATA_NAME_FORMAT = "%s.dat"; + public static final String METADATA_NAME_PLAIN_FORMAT = "%s"; + public static final String METADATA_FILE_PREFIX = "metadata"; + public static final String CLUSTER_STATE_PATH_TOKEN = "cluster-state"; + public static final String DELIMITER = "__"; + public static final String PATH_DELIMITER = "/"; + + // ToXContent Params with gateway mode. + // We are using gateway context mode to persist all custom metadata. + public static final ToXContent.Params FORMAT_PARAMS = new ToXContent.MapParams(Map.of(Metadata.CONTEXT_MODE_PARAM, Metadata.CONTEXT_MODE_GATEWAY)); + + public static BlobPath getCusterMetadataBasePath(BlobStoreRepository blobStoreRepository, String clusterName, String clusterUUID) { + return blobStoreRepository.basePath().add(encodeString(clusterName)).add(CLUSTER_STATE_PATH_TOKEN).add(clusterUUID); + } + + public static String encodeString(String content) { + return Base64.getUrlEncoder().withoutPadding().encodeToString(content.getBytes(StandardCharsets.UTF_8)); + } + + public static String getFormattedFileName(String fileName, int codecVersion) { + if (codecVersion < ClusterMetadataManifest.CODEC_V3) { + return String.format(Locale.ROOT, METADATA_NAME_FORMAT, fileName); + } + return fileName; + } + + static BlobContainer clusterUUIDContainer(BlobStoreRepository blobStoreRepository, String clusterName) { + return blobStoreRepository.blobStore() + .blobContainer( + blobStoreRepository.basePath() + .add(Base64.getUrlEncoder().withoutPadding().encodeToString(clusterName.getBytes(StandardCharsets.UTF_8))) + .add(CLUSTER_STATE_PATH_TOKEN) + ); + } + + /** + * Exception for Remote state transfer. + */ + public static class RemoteStateTransferException extends RuntimeException { + + public RemoteStateTransferException(String errorDesc) { + super(errorDesc); + } + + public RemoteStateTransferException(String errorDesc, Throwable cause) { + super(errorDesc, cause); + } + } + + public static class UploadedMetadataResults { + List uploadedIndexMetadata; + Map uploadedCustomMetadataMap; + ClusterMetadataManifest.UploadedMetadataAttribute uploadedCoordinationMetadata; + ClusterMetadataManifest.UploadedMetadataAttribute uploadedSettingsMetadata; + ClusterMetadataManifest.UploadedMetadataAttribute uploadedTemplatesMetadata; + ClusterMetadataManifest.UploadedMetadataAttribute uploadedDiscoveryNodes; + ClusterMetadataManifest.UploadedMetadataAttribute uploadedClusterBlocks; + List uploadedIndicesRoutingMetadata; + + public UploadedMetadataResults( + List uploadedIndexMetadata, + Map uploadedCustomMetadataMap, + ClusterMetadataManifest.UploadedMetadataAttribute uploadedCoordinationMetadata, + ClusterMetadataManifest.UploadedMetadataAttribute uploadedSettingsMetadata, + ClusterMetadataManifest.UploadedMetadataAttribute uploadedTemplatesMetadata, + ClusterMetadataManifest.UploadedMetadataAttribute uploadedDiscoveryNodes, + ClusterMetadataManifest.UploadedMetadataAttribute uploadedClusterBlocks, + List uploadedIndicesRoutingMetadata + ) { + this.uploadedIndexMetadata = uploadedIndexMetadata; + this.uploadedCustomMetadataMap = uploadedCustomMetadataMap; + this.uploadedCoordinationMetadata = uploadedCoordinationMetadata; + this.uploadedSettingsMetadata = uploadedSettingsMetadata; + this.uploadedTemplatesMetadata = uploadedTemplatesMetadata; + this.uploadedDiscoveryNodes = uploadedDiscoveryNodes; + this.uploadedClusterBlocks = uploadedClusterBlocks; + this.uploadedIndicesRoutingMetadata = uploadedIndicesRoutingMetadata; + } + + public UploadedMetadataResults() { + this.uploadedIndexMetadata = new ArrayList<>(); + this.uploadedCustomMetadataMap = new HashMap<>(); + this.uploadedCoordinationMetadata = null; + this.uploadedSettingsMetadata = null; + this.uploadedTemplatesMetadata = null; + this.uploadedDiscoveryNodes = null; + this.uploadedClusterBlocks = null; + this.uploadedIndicesRoutingMetadata = new ArrayList<>(); + } + } +} diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteCoordinationMetadata.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteCoordinationMetadata.java new file mode 100644 index 0000000000000..d3d53aab9a4b8 --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteCoordinationMetadata.java @@ -0,0 +1,107 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + +import static org.opensearch.gateway.remote.RemoteClusterStateUtils.DELIMITER; +import static org.opensearch.gateway.remote.RemoteClusterStateUtils.METADATA_NAME_FORMAT; +import static org.opensearch.gateway.remote.RemoteGlobalMetadataManager.GLOBAL_METADATA_CURRENT_CODEC_VERSION; + +import java.io.IOException; +import java.io.InputStream; +import java.util.List; +import org.opensearch.cluster.coordination.CoordinationMetadata; +import org.opensearch.common.io.Streams; +import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedMetadata; +import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedMetadataAttribute; +import org.opensearch.index.remote.RemoteStoreUtils; +import org.opensearch.index.translog.transfer.BlobStoreTransferService; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.repositories.blobstore.ChecksumBlobStoreFormat; +import org.opensearch.threadpool.ThreadPool; + +public class RemoteCoordinationMetadata extends AbstractRemoteBlobStoreObject { + + public static final String COORDINATION_METADATA = "coordination"; + public static final ChecksumBlobStoreFormat COORDINATION_METADATA_FORMAT = new ChecksumBlobStoreFormat<>( + "coordination", + METADATA_NAME_FORMAT, + CoordinationMetadata::fromXContent + ); + + private CoordinationMetadata coordinationMetadata; + private long metadataVersion; + private String blobName; + private final String clusterUUID; + + public RemoteCoordinationMetadata(CoordinationMetadata coordinationMetadata, long metadataVersion, String clusterUUID, BlobStoreTransferService blobStoreTransferService, BlobStoreRepository blobStoreRepository, String clusterName, + ThreadPool threadPool) { + super(blobStoreTransferService, blobStoreRepository, clusterName, threadPool); + this.coordinationMetadata = coordinationMetadata; + this.metadataVersion = metadataVersion; + this.clusterUUID = clusterUUID; + } + + public RemoteCoordinationMetadata(String blobName, String clusterUUID, BlobStoreTransferService blobStoreTransferService, BlobStoreRepository blobStoreRepository, String clusterName, + ThreadPool threadPool) { + super(blobStoreTransferService, blobStoreRepository, clusterName, threadPool); + this.blobName = blobName; + this.clusterUUID = clusterUUID; + } + + @Override + public BlobPathParameters getBlobPathParameters() { + return new BlobPathParameters(List.of("global-metadata"), COORDINATION_METADATA); + } + + @Override + public String getFullBlobName() { + return blobName; + } + + @Override + public String generateBlobFileName() { + // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/global-metadata/______ + String blobFileName = String.join( + DELIMITER, + getBlobPathParameters().getFilePrefix(), + RemoteStoreUtils.invertLong(metadataVersion), + RemoteStoreUtils.invertLong(System.currentTimeMillis()), + String.valueOf(GLOBAL_METADATA_CURRENT_CODEC_VERSION) + ); + // setting the full blob path with name for future access + this.blobName = getBlobPathForUpload().buildAsString() + blobFileName; + return blobFileName; + } + + @Override + public CoordinationMetadata get() { + return coordinationMetadata; + } + + @Override + public String clusterUUID() { + return clusterUUID; + } + + @Override + public InputStream serialize() throws IOException { + return COORDINATION_METADATA_FORMAT.serialize(coordinationMetadata, generateBlobFileName(), getCompressor(), RemoteClusterStateUtils.FORMAT_PARAMS).streamInput(); + } + + @Override + public CoordinationMetadata deserialize(InputStream inputStream) throws IOException { + return COORDINATION_METADATA_FORMAT.deserialize(blobName, getBlobStoreRepository().getNamedXContentRegistry(), Streams.readFully(inputStream)); + } + + @Override + public UploadedMetadata getUploadedMetadata() { + assert blobName != null; + return new UploadedMetadataAttribute(COORDINATION_METADATA, blobName); + } +} diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteCustomMetadata.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteCustomMetadata.java new file mode 100644 index 0000000000000..f70a99f2fa21c --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteCustomMetadata.java @@ -0,0 +1,128 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + +import static org.opensearch.gateway.remote.RemoteClusterStateUtils.DELIMITER; +import static org.opensearch.gateway.remote.RemoteClusterStateUtils.METADATA_NAME_FORMAT; +import static org.opensearch.gateway.remote.RemoteGlobalMetadataManager.GLOBAL_METADATA_CURRENT_CODEC_VERSION; + +import java.io.IOException; +import java.io.InputStream; +import java.util.List; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.metadata.Metadata.Custom; +import org.opensearch.common.io.Streams; +import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedMetadata; +import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedMetadataAttribute; +import org.opensearch.index.remote.RemoteStoreUtils; +import org.opensearch.index.translog.transfer.BlobStoreTransferService; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.repositories.blobstore.ChecksumBlobStoreFormat; +import org.opensearch.threadpool.ThreadPool; + +public class RemoteCustomMetadata extends AbstractRemoteBlobStoreObject { + + public static final String CUSTOM_METADATA = "custom"; + public static final String CUSTOM_DELIMITER = "--"; + public static final ChecksumBlobStoreFormat CUSTOM_METADATA_FORMAT = new ChecksumBlobStoreFormat<>( + "custom", + METADATA_NAME_FORMAT, + Metadata.Custom::fromXContent + ); + + public final ChecksumBlobStoreFormat customBlobStoreFormat; + + private Custom custom; + private final String customType; + private long metadataVersion; + private String blobName; + private final String clusterUUID; + + public RemoteCustomMetadata(Custom custom, String customType, long metadataVersion, String clusterUUID, BlobStoreTransferService blobStoreTransferService, + BlobStoreRepository blobStoreRepository, String clusterName, + ThreadPool threadPool) { + super(blobStoreTransferService, blobStoreRepository, clusterName, threadPool); + this.custom = custom; + this.customType = customType; + this.metadataVersion = metadataVersion; + this.clusterUUID = clusterUUID; + this.customBlobStoreFormat = new ChecksumBlobStoreFormat<>( + "custom", + METADATA_NAME_FORMAT, + (parser -> Metadata.Custom.fromXContent(parser, customType)) + ); + } + + public RemoteCustomMetadata(String blobName, String customType, String clusterUUID, BlobStoreTransferService blobStoreTransferService, + BlobStoreRepository blobStoreRepository, String clusterName, + ThreadPool threadPool) { + super(blobStoreTransferService, blobStoreRepository, clusterName, threadPool); + this.blobName = blobName; + this.customType = customType; + this.clusterUUID = clusterUUID; + this.customBlobStoreFormat = new ChecksumBlobStoreFormat<>( + "custom", + METADATA_NAME_FORMAT, + (parser -> Metadata.Custom.fromXContent(parser, customType)) + ); + } + + @Override + public BlobPathParameters getBlobPathParameters() { + String prefix = String.join(CUSTOM_DELIMITER, CUSTOM_METADATA, customType); + return new BlobPathParameters(List.of("global-metadata"), prefix); + } + + @Override + public String getFullBlobName() { + return blobName; + } + + @Override + public String generateBlobFileName() { + // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/global-metadata/______ + // + String blobFileName = String.join( + DELIMITER, + getBlobPathParameters().getFilePrefix(), + RemoteStoreUtils.invertLong(metadataVersion), + RemoteStoreUtils.invertLong(System.currentTimeMillis()), + String.valueOf(GLOBAL_METADATA_CURRENT_CODEC_VERSION) + ); + // setting the full blob path with name for future access + this.blobName = getBlobPathForUpload().buildAsString() + blobFileName; + return blobFileName; + } + + @Override + public Custom get() { + return custom; + } + + @Override + public String clusterUUID() { + return clusterUUID; + } + + @Override + public InputStream serialize() throws IOException { + return customBlobStoreFormat.serialize(custom, generateBlobFileName(), getCompressor(), RemoteClusterStateUtils.FORMAT_PARAMS).streamInput(); + } + + @Override + public Custom deserialize(InputStream inputStream) throws IOException { + return customBlobStoreFormat.deserialize(blobName, getBlobStoreRepository().getNamedXContentRegistry(), Streams.readFully(inputStream)); + } + + @Override + public UploadedMetadata getUploadedMetadata() { + assert blobName != null; + return new UploadedMetadataAttribute(String.join(CUSTOM_DELIMITER, CUSTOM_METADATA, customType), blobName); + } +} diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteDiscoveryNodes.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteDiscoveryNodes.java new file mode 100644 index 0000000000000..cfa74f683ac4f --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteDiscoveryNodes.java @@ -0,0 +1,107 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + +import static org.opensearch.gateway.remote.RemoteClusterStateAttributesManager.CLUSTER_STATE_ATTRIBUTES_CURRENT_CODEC_VERSION; +import static org.opensearch.gateway.remote.RemoteClusterStateUtils.DELIMITER; +import static org.opensearch.gateway.remote.RemoteClusterStateUtils.METADATA_NAME_FORMAT; + +import java.io.IOException; +import java.io.InputStream; +import java.util.List; +import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.common.io.Streams; +import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedMetadata; +import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedMetadataAttribute; +import org.opensearch.index.remote.RemoteStoreUtils; +import org.opensearch.index.translog.transfer.BlobStoreTransferService; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.repositories.blobstore.ChecksumBlobStoreFormat; +import org.opensearch.threadpool.ThreadPool; + +public class RemoteDiscoveryNodes extends AbstractRemoteBlobStoreObject { + + public static final String DISCOVERY_NODES = "nodes"; + public static final ChecksumBlobStoreFormat DISCOVERY_NODES_FORMAT = new ChecksumBlobStoreFormat<>( + "nodes", + METADATA_NAME_FORMAT, + DiscoveryNodes::fromXContent + ); + + private DiscoveryNodes discoveryNodes; + private long stateVersion; + private String blobName; + private final String clusterUUID; + + public RemoteDiscoveryNodes(DiscoveryNodes discoveryNodes, long stateVersion, String clusterUUID, BlobStoreTransferService blobStoreTransferService, BlobStoreRepository blobStoreRepository, String clusterName, + ThreadPool threadPool) { + super(blobStoreTransferService, blobStoreRepository, clusterName, threadPool); + this.discoveryNodes = discoveryNodes; + this.stateVersion = stateVersion; + this.clusterUUID = clusterUUID; + } + + public RemoteDiscoveryNodes(String blobName, String clusterUUID, BlobStoreTransferService blobStoreTransferService, BlobStoreRepository blobStoreRepository, String clusterName, + ThreadPool threadPool) { + super(blobStoreTransferService, blobStoreRepository, clusterName, threadPool); + this.blobName = blobName; + this.clusterUUID = clusterUUID; + } + + @Override + public BlobPathParameters getBlobPathParameters() { + return new BlobPathParameters(List.of("transient"), DISCOVERY_NODES); + } + + @Override + public String getFullBlobName() { + return blobName; + } + + @Override + public String generateBlobFileName() { + // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/transient/______ + String blobFileName = String.join( + DELIMITER, + getBlobPathParameters().getFilePrefix(), + RemoteStoreUtils.invertLong(stateVersion), + RemoteStoreUtils.invertLong(System.currentTimeMillis()), + String.valueOf(CLUSTER_STATE_ATTRIBUTES_CURRENT_CODEC_VERSION) + ); + // setting the full blob path with name for future access + this.blobName = getBlobPathForUpload().buildAsString() + blobFileName; + return blobFileName; + } + + @Override + public UploadedMetadata getUploadedMetadata() { + assert blobName != null; + return new UploadedMetadataAttribute(DISCOVERY_NODES, blobName); + } + + @Override + public DiscoveryNodes get() { + return discoveryNodes; + } + + @Override + public String clusterUUID() { + return clusterUUID; + } + + @Override + public InputStream serialize() throws IOException { + return DISCOVERY_NODES_FORMAT.serialize(discoveryNodes, generateBlobFileName(), getCompressor(), RemoteClusterStateUtils.FORMAT_PARAMS).streamInput(); + } + + @Override + public DiscoveryNodes deserialize(InputStream inputStream) throws IOException { + return DISCOVERY_NODES_FORMAT.deserialize(blobName, getBlobStoreRepository().getNamedXContentRegistry(), Streams.readFully(inputStream)); + } +} diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteGlobalMetadataManager.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteGlobalMetadataManager.java new file mode 100644 index 0000000000000..96aa8dc8bdaf6 --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteGlobalMetadataManager.java @@ -0,0 +1,326 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + +import static java.util.Objects.requireNonNull; +import static org.opensearch.gateway.remote.RemoteClusterStateUtils.METADATA_NAME_FORMAT; +import static org.opensearch.gateway.remote.RemoteClusterStateUtils.RemoteStateTransferException; +import static org.opensearch.gateway.remote.RemoteClusterStateUtils.getCusterMetadataBasePath; + +import java.io.IOException; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Locale; +import java.util.Map; +import java.util.Set; +import org.opensearch.action.LatchedActionListener; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.coordination.CoordinationMetadata; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.metadata.Metadata.Custom; +import org.opensearch.cluster.metadata.TemplatesMetadata; +import org.opensearch.common.CheckedRunnable; +import org.opensearch.common.blobstore.BlobContainer; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.action.ActionListener; +import org.opensearch.core.xcontent.ToXContent; +import org.opensearch.index.translog.transfer.BlobStoreTransferService; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.repositories.blobstore.ChecksumBlobStoreFormat; +import org.opensearch.threadpool.ThreadPool; + +public class RemoteGlobalMetadataManager { + + public static final String GLOBAL_METADATA_PATH_TOKEN = "global-metadata"; + public static final String COORDINATION_METADATA = "coordination"; + public static final String SETTING_METADATA = "settings"; + public static final String TEMPLATES_METADATA = "templates"; + public static final String CUSTOM_METADATA = "custom"; + public static final String CUSTOM_DELIMITER = "--"; + + public static final TimeValue GLOBAL_METADATA_UPLOAD_TIMEOUT_DEFAULT = TimeValue.timeValueMillis(20000); + + public static final Setting GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING = Setting.timeSetting( + "cluster.remote_store.state.global_metadata.upload_timeout", + GLOBAL_METADATA_UPLOAD_TIMEOUT_DEFAULT, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + public static final ChecksumBlobStoreFormat GLOBAL_METADATA_FORMAT = new ChecksumBlobStoreFormat<>( + "metadata", + METADATA_NAME_FORMAT, + Metadata::fromXContent + ); + + public static final ChecksumBlobStoreFormat COORDINATION_METADATA_FORMAT = new ChecksumBlobStoreFormat<>( + "coordination", + METADATA_NAME_FORMAT, + CoordinationMetadata::fromXContent + ); + + public static final ChecksumBlobStoreFormat SETTINGS_METADATA_FORMAT = new ChecksumBlobStoreFormat<>( + "settings", + METADATA_NAME_FORMAT, + Settings::fromXContent + ); + + public static final ChecksumBlobStoreFormat TEMPLATES_METADATA_FORMAT = new ChecksumBlobStoreFormat<>( + "templates", + METADATA_NAME_FORMAT, + TemplatesMetadata::fromXContent + ); + + public static final int GLOBAL_METADATA_CURRENT_CODEC_VERSION = 1; + + private final BlobStoreRepository blobStoreRepository; + private final ThreadPool threadPool; + + private volatile TimeValue globalMetadataUploadTimeout; + private final BlobStoreTransferService blobStoreTransferService; + private final String clusterName; + + RemoteGlobalMetadataManager(BlobStoreRepository blobStoreRepository, ClusterSettings clusterSettings, ThreadPool threadPool, BlobStoreTransferService blobStoreTransferService, + String clusterName) { + this.blobStoreRepository = blobStoreRepository; + this.globalMetadataUploadTimeout = clusterSettings.get(GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING); + this.threadPool = threadPool; + this.blobStoreTransferService = blobStoreTransferService; + this.clusterName = clusterName; + clusterSettings.addSettingsUpdateConsumer(GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING, this::setGlobalMetadataUploadTimeout); + } + + /** + * Allows async upload of Metadata components to remote + */ + CheckedRunnable getAsyncMetadataWriteAction( + Object objectToUpload, + long metadataVersion, + String clusterUUID, + LatchedActionListener latchedActionListener, + String customType + ) { + AbstractRemoteBlobStoreObject remoteBlobStoreObject = getRemoteObject(objectToUpload, metadataVersion, clusterUUID, customType); + ActionListener completionListener = ActionListener.wrap( + resp -> latchedActionListener.onResponse( + remoteBlobStoreObject.getUploadedMetadata() + ), + ex -> latchedActionListener.onFailure(new RemoteStateTransferException("Upload failed", ex)) + ); + return remoteBlobStoreObject.writeAsync(completionListener); + } + + CheckedRunnable getAsyncMetadataReadAction( + String clusterName, + String clusterUUID, + String component, + String componentName, + String uploadFilename, + LatchedActionListener listener + ) { + AbstractRemoteBlobStoreObject remoteBlobStoreObject; + if (component.equals(COORDINATION_METADATA)) { + remoteBlobStoreObject = new RemoteCoordinationMetadata(uploadFilename, clusterUUID, blobStoreTransferService, blobStoreRepository, clusterName, threadPool); + } else if (component.equals(TEMPLATES_METADATA)) { + remoteBlobStoreObject = new RemoteTemplatesMetadata(uploadFilename, clusterUUID, blobStoreTransferService, blobStoreRepository, clusterName, threadPool); + } else if (component.equals(SETTING_METADATA)) { + remoteBlobStoreObject = new RemotePersistentSettingsMetadata(uploadFilename, clusterUUID, blobStoreTransferService, blobStoreRepository, clusterName, threadPool); + } else if (component.equals(CUSTOM_METADATA)) { + remoteBlobStoreObject = new RemoteCustomMetadata(uploadFilename, componentName, clusterUUID, blobStoreTransferService, blobStoreRepository, clusterName, threadPool); + } else { + throw new RemoteStateTransferException("Unknown component " + componentName); + } + ActionListener actionListener = ActionListener.wrap(response -> listener.onResponse(new RemoteReadResult((ToXContent) response, component, componentName)), listener::onFailure); + return () -> remoteBlobStoreObject.readAsync(actionListener); + } + + Metadata getGlobalMetadata(String clusterName, String clusterUUID, ClusterMetadataManifest clusterMetadataManifest) { + String globalMetadataFileName = clusterMetadataManifest.getGlobalMetadataFileName(); + try { + // Fetch Global metadata + if (globalMetadataFileName != null) { + String[] splitPath = globalMetadataFileName.split("/"); + return GLOBAL_METADATA_FORMAT.read( + globalMetadataContainer(clusterName, clusterUUID), + splitPath[splitPath.length - 1], + blobStoreRepository.getNamedXContentRegistry() + ); + } else if (clusterMetadataManifest.hasMetadataAttributesFiles()) { + CoordinationMetadata coordinationMetadata = getCoordinationMetadata( + clusterUUID, + clusterMetadataManifest.getCoordinationMetadata().getUploadedFilename() + ); + Settings settingsMetadata = getSettingsMetadata( + clusterUUID, + clusterMetadataManifest.getSettingsMetadata().getUploadedFilename() + ); + TemplatesMetadata templatesMetadata = getTemplatesMetadata( + clusterUUID, + clusterMetadataManifest.getTemplatesMetadata().getUploadedFilename() + ); + Metadata.Builder builder = new Metadata.Builder(); + builder.coordinationMetadata(coordinationMetadata); + builder.persistentSettings(settingsMetadata); + builder.templates(templatesMetadata); + builder.clusterUUID(clusterMetadataManifest.getClusterUUID()); + builder.clusterUUIDCommitted(clusterMetadataManifest.isClusterUUIDCommitted()); + //todo add metadata version + //builder.version(clusterMetadataManifest.met) + clusterMetadataManifest.getCustomMetadataMap() + .forEach( + (key, value) -> builder.putCustom( + key, + getCustomsMetadata(clusterUUID, value.getUploadedFilename(), key) + ) + ); + return builder.build(); + } else { + return Metadata.EMPTY_METADATA; + } + } catch (IOException e) { + throw new IllegalStateException( + String.format(Locale.ROOT, "Error while downloading Global Metadata - %s", globalMetadataFileName), + e + ); + } + } + + public CoordinationMetadata getCoordinationMetadata(String clusterUUID, String coordinationMetadataFileName) { + try { + // Fetch Coordination metadata + if (coordinationMetadataFileName != null) { + RemoteCoordinationMetadata remoteCoordinationMetadata = new RemoteCoordinationMetadata(coordinationMetadataFileName, clusterUUID, + blobStoreTransferService, blobStoreRepository, clusterName, threadPool); + return remoteCoordinationMetadata.read(); + } else { + return CoordinationMetadata.EMPTY_METADATA; + } + } catch (IOException e) { + throw new IllegalStateException( + String.format(Locale.ROOT, "Error while downloading Coordination Metadata - %s", coordinationMetadataFileName), + e + ); + } + } + + public Settings getSettingsMetadata(String clusterUUID, String settingsMetadataFileName) { + try { + // Fetch Settings metadata + if (settingsMetadataFileName != null) { + RemotePersistentSettingsMetadata remotePersistentSettingsMetadata = new RemotePersistentSettingsMetadata(settingsMetadataFileName, clusterUUID, + blobStoreTransferService, blobStoreRepository, clusterName, threadPool); + return remotePersistentSettingsMetadata.read(); + } else { + return Settings.EMPTY; + } + } catch (IOException e) { + throw new IllegalStateException( + String.format(Locale.ROOT, "Error while downloading Settings Metadata - %s", settingsMetadataFileName), + e + ); + } + } + + public TemplatesMetadata getTemplatesMetadata(String clusterUUID, String templatesMetadataFileName) { + try { + // Fetch Templates metadata + if (templatesMetadataFileName != null) { + RemoteTemplatesMetadata remoteTemplatesMetadata = new RemoteTemplatesMetadata(templatesMetadataFileName, clusterUUID, + blobStoreTransferService, blobStoreRepository, clusterName, threadPool); + return remoteTemplatesMetadata.read(); + } else { + return TemplatesMetadata.EMPTY_METADATA; + } + } catch (IOException e) { + throw new IllegalStateException( + String.format(Locale.ROOT, "Error while downloading Templates Metadata - %s", templatesMetadataFileName), + e + ); + } + } + + public Metadata.Custom getCustomsMetadata(String clusterUUID, String customMetadataFileName, String custom) { + requireNonNull(customMetadataFileName); + try { + // Fetch Custom metadata + RemoteCustomMetadata remoteCustomMetadata = new RemoteCustomMetadata(customMetadataFileName, custom, clusterUUID, blobStoreTransferService, blobStoreRepository, clusterName, threadPool); + return remoteCustomMetadata.read(); + } catch (IOException e) { + throw new IllegalStateException( + String.format(Locale.ROOT, "Error while downloading Custom Metadata - %s", customMetadataFileName), + e + ); + } + } + + Map getUpdatedCustoms(ClusterState currentState, ClusterState previousState) { + if (Metadata.isCustomMetadataEqual(previousState.metadata(), currentState.metadata())) { + return new HashMap<>(); + } + Map updatedCustom = new HashMap<>(); + Set currentCustoms = new HashSet<>(currentState.metadata().customs().keySet()); + for (Map.Entry cursor : previousState.metadata().customs().entrySet()) { + if (cursor.getValue().context().contains(Metadata.XContentContext.GATEWAY)) { + if (currentCustoms.contains(cursor.getKey()) + && !cursor.getValue().equals(currentState.metadata().custom(cursor.getKey()))) { + // If the custom metadata is updated, we need to upload the new version. + updatedCustom.put(cursor.getKey(), currentState.metadata().custom(cursor.getKey())); + } + currentCustoms.remove(cursor.getKey()); + } + } + for (String custom : currentCustoms) { + Metadata.Custom cursor = currentState.metadata().custom(custom); + if (cursor.context().contains(Metadata.XContentContext.GATEWAY)) { + updatedCustom.put(custom, cursor); + } + } + return updatedCustom; + } + + private AbstractRemoteBlobStoreObject getRemoteObject(Object object, long metadataVersion, String clusterUUID, String customType) { + if (object instanceof CoordinationMetadata) { + return new RemoteCoordinationMetadata((CoordinationMetadata) object, metadataVersion, clusterUUID, blobStoreTransferService, + blobStoreRepository, clusterName, threadPool); + } else if (object instanceof Settings) { + return new RemotePersistentSettingsMetadata((Settings) object, metadataVersion, clusterUUID, blobStoreTransferService, + blobStoreRepository, clusterName, threadPool); + } else if (object instanceof TemplatesMetadata) { + return new RemoteTemplatesMetadata((TemplatesMetadata) object, metadataVersion, clusterUUID, blobStoreTransferService, + blobStoreRepository, clusterName, threadPool); + } else if (object instanceof Custom) { + return new RemoteCustomMetadata((Custom) object, customType ,metadataVersion, clusterUUID, blobStoreTransferService, + blobStoreRepository, clusterName, threadPool); + } + throw new RemoteStateTransferException("Remote object cannot be created for " + object.getClass()); + } + + private BlobContainer globalMetadataContainer(String clusterName, String clusterUUID) { + // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/global-metadata/ + return blobStoreRepository.blobStore() + .blobContainer(getCusterMetadataBasePath(blobStoreRepository, clusterName, clusterUUID).add(GLOBAL_METADATA_PATH_TOKEN)); + } + + boolean isGlobalMetadataEqual(ClusterMetadataManifest first, ClusterMetadataManifest second, String clusterName) { + Metadata secondGlobalMetadata = getGlobalMetadata(clusterName, second.getClusterUUID(), second); + Metadata firstGlobalMetadata = getGlobalMetadata(clusterName, first.getClusterUUID(), first); + return Metadata.isGlobalResourcesMetadataEquals(firstGlobalMetadata, secondGlobalMetadata); + } + + private void setGlobalMetadataUploadTimeout(TimeValue newGlobalMetadataUploadTimeout) { + this.globalMetadataUploadTimeout = newGlobalMetadataUploadTimeout; + } + + public TimeValue getGlobalMetadataUploadTimeout() { + return this.globalMetadataUploadTimeout; + } +} diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteIndexMetadata.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteIndexMetadata.java new file mode 100644 index 0000000000000..3249927732988 --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteIndexMetadata.java @@ -0,0 +1,111 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + + +import static org.opensearch.gateway.remote.RemoteClusterStateUtils.METADATA_NAME_PLAIN_FORMAT; + +import java.io.IOException; +import java.io.InputStream; +import java.util.List; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.io.Streams; +import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedIndexMetadata; +import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedMetadata; +import org.opensearch.index.remote.RemoteStoreUtils; +import org.opensearch.index.translog.transfer.BlobStoreTransferService; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.repositories.blobstore.ChecksumBlobStoreFormat; +import org.opensearch.threadpool.ThreadPool; + +public class RemoteIndexMetadata extends AbstractRemoteBlobStoreObject { + + public static final int INDEX_METADATA_CURRENT_CODEC_VERSION = 1; + + public static final ChecksumBlobStoreFormat INDEX_METADATA_FORMAT = new ChecksumBlobStoreFormat<>( + "index-metadata", + METADATA_NAME_PLAIN_FORMAT, + IndexMetadata::fromXContent + ); + public static final String INDEX_PATH_TOKEN = "index"; + + private IndexMetadata indexMetadata; + private String blobName; + private final String clusterUUID; + + public RemoteIndexMetadata(IndexMetadata indexMetadata, String clusterUUID, + BlobStoreTransferService blobStoreTransferService, BlobStoreRepository blobStoreRepository, String clusterName, + ThreadPool threadPool) { + super(blobStoreTransferService, blobStoreRepository, clusterName, threadPool); + this.indexMetadata = indexMetadata; + this.clusterUUID = clusterUUID; + } + + public RemoteIndexMetadata(String blobName, String clusterUUID, + BlobStoreTransferService blobStoreTransferService, BlobStoreRepository blobStoreRepository, String clusterName, + ThreadPool threadPool) { + super(blobStoreTransferService, blobStoreRepository, clusterName, threadPool); + this.blobName = blobName; + this.clusterUUID = clusterUUID; + } + + @Override + public IndexMetadata get() { + return indexMetadata; + } + + @Override + public String clusterUUID() { + return clusterUUID; + } + + @Override + public String getFullBlobName() { + return blobName; + } + + @Override + public BlobPathParameters getBlobPathParameters() { + return new BlobPathParameters(List.of(INDEX_PATH_TOKEN), "metadata"); + } + + @Override + public String generateBlobFileName() { + String blobFileName = String.join( + RemoteClusterStateUtils.DELIMITER, + getBlobPathParameters().getFilePrefix(), + RemoteStoreUtils.invertLong(indexMetadata.getVersion()), + RemoteStoreUtils.invertLong(System.currentTimeMillis()), + String.valueOf(INDEX_METADATA_CURRENT_CODEC_VERSION) // Keep the codec version at last place only, during reads we read last + // place to determine codec version. + ); + // setting the full blob path with name for future access + this.blobName = getBlobPathForUpload().buildAsString() + blobFileName; + return blobFileName; + } + + @Override + public UploadedMetadata getUploadedMetadata() { + assert blobName != null; + return new UploadedIndexMetadata(indexMetadata.getIndexName(), indexMetadata.getIndexUUID(), blobName); + } + + @Override + public InputStream serialize() throws IOException { + return INDEX_METADATA_FORMAT.serialize(indexMetadata, generateBlobFileName(), getCompressor(), RemoteClusterStateUtils.FORMAT_PARAMS) + .streamInput(); + } + + @Override + public IndexMetadata deserialize(InputStream inputStream) throws IOException { + // Blob name parameter is redundant + return INDEX_METADATA_FORMAT.deserialize(blobName, getBlobStoreRepository().getNamedXContentRegistry(), Streams.readFully(inputStream)); + } + +} diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteIndexMetadataManager.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteIndexMetadataManager.java new file mode 100644 index 0000000000000..fc90ba82999ae --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteIndexMetadataManager.java @@ -0,0 +1,115 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + +import static org.opensearch.gateway.remote.RemoteClusterStateUtils.RemoteStateTransferException; +import static org.opensearch.gateway.remote.RemoteIndexMetadata.INDEX_PATH_TOKEN; + +import java.io.IOException; +import java.util.Locale; +import org.opensearch.action.LatchedActionListener; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.CheckedRunnable; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Setting; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.action.ActionListener; +import org.opensearch.index.translog.transfer.BlobStoreTransferService; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.threadpool.ThreadPool; + +public class RemoteIndexMetadataManager { + + public static final TimeValue INDEX_METADATA_UPLOAD_TIMEOUT_DEFAULT = TimeValue.timeValueMillis(20000); + + public static final Setting INDEX_METADATA_UPLOAD_TIMEOUT_SETTING = Setting.timeSetting( + "cluster.remote_store.state.index_metadata.upload_timeout", + INDEX_METADATA_UPLOAD_TIMEOUT_DEFAULT, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + private final BlobStoreRepository blobStoreRepository; + private final ThreadPool threadPool; + + private final BlobStoreTransferService blobStoreTransferService; + + private volatile TimeValue indexMetadataUploadTimeout; + private final String clusterName; + + public RemoteIndexMetadataManager(BlobStoreRepository blobStoreRepository, ClusterSettings clusterSettings, ThreadPool threadPool, String clusterName, + BlobStoreTransferService blobStoreTransferService) { + this.blobStoreRepository = blobStoreRepository; + this.indexMetadataUploadTimeout = clusterSettings.get(INDEX_METADATA_UPLOAD_TIMEOUT_SETTING); + this.threadPool = threadPool; + clusterSettings.addSettingsUpdateConsumer(INDEX_METADATA_UPLOAD_TIMEOUT_SETTING, this::setIndexMetadataUploadTimeout); + this.blobStoreTransferService = blobStoreTransferService; + this.clusterName = clusterName; + } + + /** + * Allows async Upload of IndexMetadata to remote + * + * @param indexMetadata {@link IndexMetadata} to upload + * @param latchedActionListener listener to respond back on after upload finishes + */ + CheckedRunnable getIndexMetadataAsyncAction(IndexMetadata indexMetadata, String clusterUUID, + LatchedActionListener latchedActionListener) { + RemoteIndexMetadata remoteIndexMetadata = new RemoteIndexMetadata(indexMetadata, clusterUUID, blobStoreTransferService, blobStoreRepository, clusterName, threadPool); + ActionListener completionListener = ActionListener.wrap( + resp -> latchedActionListener.onResponse( + remoteIndexMetadata.getUploadedMetadata() + ), + ex -> latchedActionListener.onFailure(new RemoteStateTransferException(indexMetadata.getIndex().toString(), ex)) + ); + return remoteIndexMetadata.writeAsync(completionListener); + } + + CheckedRunnable getAsyncIndexMetadataReadAction( + String clusterUUID, + String uploadedFilename, + LatchedActionListener latchedActionListener + ) { + RemoteIndexMetadata remoteIndexMetadata = new RemoteIndexMetadata(uploadedFilename, clusterUUID, blobStoreTransferService, blobStoreRepository, clusterName, threadPool); + ActionListener actionListener = ActionListener.wrap( + response -> latchedActionListener.onResponse(new RemoteReadResult(response, INDEX_PATH_TOKEN, response.getIndexName())), + latchedActionListener::onFailure); + return () -> remoteIndexMetadata.readAsync(actionListener); + } + + /** + * Fetch index metadata from remote cluster state + * + * @param uploadedIndexMetadata {@link ClusterMetadataManifest.UploadedIndexMetadata} contains details about remote location of index metadata + * @return {@link IndexMetadata} + */ + IndexMetadata getIndexMetadata( + ClusterMetadataManifest.UploadedIndexMetadata uploadedIndexMetadata, String clusterUUID, int manifestCodecVersion + ) { + RemoteIndexMetadata remoteIndexMetadata = new RemoteIndexMetadata(RemoteClusterStateUtils.getFormattedFileName( + uploadedIndexMetadata.getUploadedFilename(), manifestCodecVersion), clusterUUID, blobStoreTransferService, blobStoreRepository, clusterName, threadPool); + try { + return remoteIndexMetadata.read(); + } catch (IOException e) { + throw new IllegalStateException( + String.format(Locale.ROOT, "Error while downloading IndexMetadata - %s", uploadedIndexMetadata.getUploadedFilename()), + e + ); + } + } + + public TimeValue getIndexMetadataUploadTimeout() { + return this.indexMetadataUploadTimeout; + } + + private void setIndexMetadataUploadTimeout(TimeValue newIndexMetadataUploadTimeout) { + this.indexMetadataUploadTimeout = newIndexMetadataUploadTimeout; + } + +} diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteManifestManager.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteManifestManager.java new file mode 100644 index 0000000000000..f597e43149a07 --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteManifestManager.java @@ -0,0 +1,389 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.Version; +import org.opensearch.action.LatchedActionListener; +import org.opensearch.cluster.ClusterState; +import org.opensearch.common.blobstore.BlobContainer; +import org.opensearch.common.blobstore.BlobMetadata; +import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Setting; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.action.ActionListener; +import org.opensearch.index.remote.RemoteStoreUtils; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.repositories.blobstore.ChecksumBlobStoreFormat; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; + +import static org.opensearch.gateway.remote.RemoteClusterStateUtils.DELIMITER; +import static org.opensearch.gateway.remote.RemoteClusterStateUtils.FORMAT_PARAMS; +import static org.opensearch.gateway.remote.RemoteClusterStateUtils.RemoteStateTransferException; +import static org.opensearch.gateway.remote.RemoteClusterStateUtils.getCusterMetadataBasePath; + +public class RemoteManifestManager { + public static final String MANIFEST_PATH_TOKEN = "manifest"; + public static final String MANIFEST_FILE_PREFIX = "manifest"; + public static final String METADATA_MANIFEST_NAME_FORMAT = "%s"; + public static final int MANIFEST_CURRENT_CODEC_VERSION = ClusterMetadataManifest.CODEC_V3; + public static final int SPLITED_MANIFEST_FILE_LENGTH = 6; // file name manifest__term__version__C/P__timestamp__codecversion + + public static final TimeValue METADATA_MANIFEST_UPLOAD_TIMEOUT_DEFAULT = TimeValue.timeValueMillis(20000); + + public static final Setting METADATA_MANIFEST_UPLOAD_TIMEOUT_SETTING = Setting.timeSetting( + "cluster.remote_store.state.metadata_manifest.upload_timeout", + METADATA_MANIFEST_UPLOAD_TIMEOUT_DEFAULT, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Manifest format compatible with older codec v0, where codec version was missing. + */ + public static final ChecksumBlobStoreFormat CLUSTER_METADATA_MANIFEST_FORMAT_V0 = + new ChecksumBlobStoreFormat<>("cluster-metadata-manifest", METADATA_MANIFEST_NAME_FORMAT, ClusterMetadataManifest::fromXContentV0); + /** + * Manifest format compatible with older codec v1, where global metadata was missing. + */ + public static final ChecksumBlobStoreFormat CLUSTER_METADATA_MANIFEST_FORMAT_V1 = + new ChecksumBlobStoreFormat<>("cluster-metadata-manifest", METADATA_MANIFEST_NAME_FORMAT, ClusterMetadataManifest::fromXContentV1); + + /** + * Manifest format compatible with codec v2, where we introduced codec versions/global metadata. + */ + public static final ChecksumBlobStoreFormat CLUSTER_METADATA_MANIFEST_FORMAT = new ChecksumBlobStoreFormat<>( + "cluster-metadata-manifest", + METADATA_MANIFEST_NAME_FORMAT, + ClusterMetadataManifest::fromXContent + ); + + private final BlobStoreRepository blobStoreRepository; + private volatile TimeValue metadataManifestUploadTimeout; + private final String nodeId; + private static final Logger logger = LogManager.getLogger(RemoteManifestManager.class); + + RemoteManifestManager(BlobStoreRepository blobStoreRepository, ClusterSettings clusterSettings, String nodeId) { + this.blobStoreRepository = blobStoreRepository; + this.metadataManifestUploadTimeout = clusterSettings.get(METADATA_MANIFEST_UPLOAD_TIMEOUT_SETTING); + this.nodeId = nodeId; + clusterSettings.addSettingsUpdateConsumer(METADATA_MANIFEST_UPLOAD_TIMEOUT_SETTING, this::setMetadataManifestUploadTimeout); + } + + private ClusterMetadataManifest uploadV1Manifest( + ClusterState clusterState, + List uploadedIndexMetadata, + String previousClusterUUID, + String globalMetadataFileName, + boolean committed + ) throws IOException { + synchronized (this) { + final String manifestFileName = getManifestFileName( + clusterState.term(), + clusterState.version(), + committed, + ClusterMetadataManifest.CODEC_V1 + ); + ClusterMetadataManifest manifest = ClusterMetadataManifest.builder() + .clusterTerm(clusterState.term()) + .stateVersion(clusterState.getVersion()) + .clusterUUID(clusterState.metadata().clusterUUID()) + .stateUUID(clusterState.stateUUID()) + .opensearchVersion(Version.CURRENT) + .nodeId(nodeId) + .committed(committed) + .codecVersion(ClusterMetadataManifest.CODEC_V1) + .globalMetadataFileName(globalMetadataFileName) + .indices(uploadedIndexMetadata) + .previousClusterUUID(previousClusterUUID) + .clusterUUIDCommitted(clusterState.metadata().clusterUUIDCommitted()) + .build(); + writeMetadataManifest(clusterState.getClusterName().value(), clusterState.metadata().clusterUUID(), manifest, manifestFileName); + return manifest; + } + } + + ClusterMetadataManifest uploadManifest( + ClusterState clusterState, + List uploadedIndexMetadata, + String previousClusterUUID, + ClusterMetadataManifest.UploadedMetadataAttribute uploadedCoordinationMetadata, + ClusterMetadataManifest.UploadedMetadataAttribute uploadedSettingsMetadata, + ClusterMetadataManifest.UploadedMetadataAttribute uploadedTemplatesMetadata, + Map uploadedCustomMetadataMap, + ClusterMetadataManifest.UploadedMetadataAttribute uploadedDiscoveryNodesMetadata, + ClusterMetadataManifest.UploadedMetadataAttribute uploadedClusterBlocksMetadata, + ClusterStateDiffManifest clusterDiffManifest, + List routingIndexMetadata, boolean committed + ) throws IOException { + synchronized (this) { + final String manifestFileName = getManifestFileName( + clusterState.term(), + clusterState.version(), + committed, + MANIFEST_CURRENT_CODEC_VERSION + ); + ClusterMetadataManifest.Builder manifestBuilder = ClusterMetadataManifest.builder(); + manifestBuilder.clusterTerm(clusterState.term()) + .stateVersion(clusterState.getVersion()) + .clusterUUID(clusterState.metadata().clusterUUID()) + .stateUUID(clusterState.stateUUID()) + .opensearchVersion(Version.CURRENT) + .nodeId(nodeId) + .committed(committed) + .codecVersion(MANIFEST_CURRENT_CODEC_VERSION) + .indices(uploadedIndexMetadata) + .previousClusterUUID(previousClusterUUID) + .clusterUUIDCommitted(clusterState.metadata().clusterUUIDCommitted()) + .coordinationMetadata(uploadedCoordinationMetadata) + .settingMetadata(uploadedSettingsMetadata) + .templatesMetadata(uploadedTemplatesMetadata) + .customMetadataMap(uploadedCustomMetadataMap) + .discoveryNodesMetadata(uploadedDiscoveryNodesMetadata) + .clusterBlocksMetadata(uploadedClusterBlocksMetadata) + .diffManifest(clusterDiffManifest) + .routingTableVersion(clusterState.getRoutingTable().version()) + .indicesRouting(routingIndexMetadata) + .metadataVersion(clusterState.metadata().version()); + final ClusterMetadataManifest manifest = manifestBuilder.build(); + writeMetadataManifest(clusterState.getClusterName().value(), clusterState.metadata().clusterUUID(), manifest, manifestFileName); + return manifest; + } + } + + private void writeMetadataManifest(String clusterName, String clusterUUID, ClusterMetadataManifest uploadManifest, String fileName) + throws IOException { + AtomicReference result = new AtomicReference(); + AtomicReference exceptionReference = new AtomicReference(); + + final BlobContainer metadataManifestContainer = manifestContainer(clusterName, clusterUUID); + + // latch to wait until upload is not finished + CountDownLatch latch = new CountDownLatch(1); + + LatchedActionListener completionListener = new LatchedActionListener<>(ActionListener.wrap(resp -> { + logger.trace(String.format(Locale.ROOT, "Manifest file uploaded successfully.")); + }, ex -> { exceptionReference.set(ex); }), latch); + + getClusterMetadataManifestBlobStoreFormat(fileName).writeAsyncWithUrgentPriority( + uploadManifest, + metadataManifestContainer, + fileName, + blobStoreRepository.getCompressor(), + completionListener, + FORMAT_PARAMS + ); + + try { + if (latch.await(getMetadataManifestUploadTimeout().millis(), TimeUnit.MILLISECONDS) == false) { + RemoteStateTransferException ex = new RemoteStateTransferException( + String.format(Locale.ROOT, "Timed out waiting for transfer of manifest file to complete") + ); + throw ex; + } + } catch (InterruptedException ex) { + RemoteStateTransferException exception = new RemoteStateTransferException( + String.format(Locale.ROOT, "Timed out waiting for transfer of manifest file to complete - %s"), + ex + ); + Thread.currentThread().interrupt(); + throw exception; + } + if (exceptionReference.get() != null) { + throw new RemoteStateTransferException(exceptionReference.get().getMessage(), exceptionReference.get()); + } + logger.debug( + "Metadata manifest file [{}] written during [{}] phase. ", + fileName, + uploadManifest.isCommitted() ? "commit" : "publish" + ); + } + + /** + * Fetch latest ClusterMetadataManifest from remote state store + * + * @param clusterUUID uuid of cluster state to refer to in remote + * @param clusterName name of the cluster + * @return ClusterMetadataManifest + */ + public Optional getLatestClusterMetadataManifest(String clusterName, String clusterUUID) { + Optional latestManifestFileName = getLatestManifestFileName(clusterName, clusterUUID); + return latestManifestFileName.map(s -> fetchRemoteClusterMetadataManifest(clusterName, clusterUUID, s)); + } + + public Optional getClusterMetadataManifestByTermVersion(String clusterName, String clusterUUID, long term, long version) { + Optional manifestFileName = getManifestFileNameByTermVersion(clusterName, clusterUUID, term, version); + return manifestFileName.map(s -> fetchRemoteClusterMetadataManifest(clusterName, clusterUUID, s)); + } + + /** + * Fetch ClusterMetadataManifest from remote state store + * + * @param clusterUUID uuid of cluster state to refer to in remote + * @param clusterName name of the cluster + * @return ClusterMetadataManifest + */ + ClusterMetadataManifest fetchRemoteClusterMetadataManifest(String clusterName, String clusterUUID, String filename) + throws IllegalStateException { + try { + return getClusterMetadataManifestBlobStoreFormat(filename).read( + manifestContainer(clusterName, clusterUUID), + filename, + blobStoreRepository.getNamedXContentRegistry() + ); + } catch (IOException e) { + throw new IllegalStateException(String.format(Locale.ROOT, "Error while downloading cluster metadata - %s", filename), e); + } + } + + Map getLatestManifestForAllClusterUUIDs(String clusterName, Set clusterUUIDs) { + Map manifestsByClusterUUID = new HashMap<>(); + for (String clusterUUID : clusterUUIDs) { + try { + Optional manifest = getLatestClusterMetadataManifest(clusterName, clusterUUID); + manifest.ifPresent(clusterMetadataManifest -> manifestsByClusterUUID.put(clusterUUID, clusterMetadataManifest)); + } catch (Exception e) { + throw new IllegalStateException( + String.format(Locale.ROOT, "Exception in fetching manifest for clusterUUID: %s", clusterUUID), + e + ); + } + } + return manifestsByClusterUUID; + } + + private ChecksumBlobStoreFormat getClusterMetadataManifestBlobStoreFormat(String fileName) { + long codecVersion = getManifestCodecVersion(fileName); + if (codecVersion == MANIFEST_CURRENT_CODEC_VERSION) { + return CLUSTER_METADATA_MANIFEST_FORMAT; + } else if (codecVersion == ClusterMetadataManifest.CODEC_V1) { + return CLUSTER_METADATA_MANIFEST_FORMAT_V1; + } else if (codecVersion == ClusterMetadataManifest.CODEC_V0) { + return CLUSTER_METADATA_MANIFEST_FORMAT_V0; + } + + throw new IllegalArgumentException("Cluster metadata manifest file is corrupted, don't have valid codec version"); + } + + private int getManifestCodecVersion(String fileName) { + String[] splitName = fileName.split(DELIMITER); + if (splitName.length == SPLITED_MANIFEST_FILE_LENGTH) { + return Integer.parseInt(splitName[splitName.length - 1]); // Last value would be codec version. + } else if (splitName.length < SPLITED_MANIFEST_FILE_LENGTH) { // Where codec is not part of file name, i.e. default codec version 0 + // is used. + return ClusterMetadataManifest.CODEC_V0; + } else { + throw new IllegalArgumentException("Manifest file name is corrupted"); + } + } + + private BlobContainer manifestContainer(String clusterName, String clusterUUID) { + // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/manifest + return blobStoreRepository.blobStore().blobContainer(getManifestFolderPath(clusterName, clusterUUID)); + } + + BlobPath getManifestFolderPath(String clusterName, String clusterUUID) { + return getCusterMetadataBasePath(blobStoreRepository, clusterName, clusterUUID).add(MANIFEST_PATH_TOKEN); + } + + public TimeValue getMetadataManifestUploadTimeout() { + return this.metadataManifestUploadTimeout; + } + + private void setMetadataManifestUploadTimeout(TimeValue newMetadataManifestUploadTimeout) { + this.metadataManifestUploadTimeout = newMetadataManifestUploadTimeout; + } + + /** + * Fetch ClusterMetadataManifest files from remote state store in order + * + * @param clusterUUID uuid of cluster state to refer to in remote + * @param clusterName name of the cluster + * @param limit max no of files to fetch + * @return all manifest file names + */ + private List getManifestFileNames(String clusterName, String clusterUUID, String filePrefix, int limit) throws IllegalStateException { + try { + + /* + {@link BlobContainer#listBlobsByPrefixInSortedOrder} will list the latest manifest file first + as the manifest file name generated via {@link RemoteClusterStateService#getManifestFileName} ensures + when sorted in LEXICOGRAPHIC order the latest uploaded manifest file comes on top. + */ + return manifestContainer(clusterName, clusterUUID).listBlobsByPrefixInSortedOrder( + filePrefix, + limit, + BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC + ); + } catch (IOException e) { + throw new IllegalStateException("Error while fetching latest manifest file for remote cluster state", e); + } + } + + static String getManifestFileName(long term, long version, boolean committed, int codecVersion) { + // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/manifest/manifest______C/P____ + return String.join( + DELIMITER, + MANIFEST_PATH_TOKEN, + RemoteStoreUtils.invertLong(term), + RemoteStoreUtils.invertLong(version), + (committed ? "C" : "P"), // C for committed and P for published + RemoteStoreUtils.invertLong(System.currentTimeMillis()), + String.valueOf(codecVersion) // Keep the codec version at last place only, during read we reads last place to + // determine codec version. + ); + } + + static String getManifestFilePrefixForTermVersion(long term, long version) { + return String.join( + DELIMITER, + MANIFEST_FILE_PREFIX, + RemoteStoreUtils.invertLong(term), + RemoteStoreUtils.invertLong(version) + ) + DELIMITER; + } + + /** + * Fetch latest ClusterMetadataManifest file from remote state store + * + * @param clusterUUID uuid of cluster state to refer to in remote + * @param clusterName name of the cluster + * @return latest ClusterMetadataManifest filename + */ + private Optional getLatestManifestFileName(String clusterName, String clusterUUID) throws IllegalStateException { + List manifestFilesMetadata = getManifestFileNames(clusterName, clusterUUID, MANIFEST_FILE_PREFIX + DELIMITER, 1); + if (manifestFilesMetadata != null && !manifestFilesMetadata.isEmpty()) { + return Optional.of(manifestFilesMetadata.get(0).name()); + } + logger.info("No manifest file present in remote store for cluster name: {}, cluster UUID: {}", clusterName, clusterUUID); + return Optional.empty(); + } + + private Optional getManifestFileNameByTermVersion(String clusterName, String clusterUUID, long term, long version) throws IllegalStateException { + final String filePrefix = getManifestFilePrefixForTermVersion(term, version); + List manifestFilesMetadata = getManifestFileNames(clusterName, clusterUUID, filePrefix, 1); + if (manifestFilesMetadata != null && !manifestFilesMetadata.isEmpty()) { + return Optional.of(manifestFilesMetadata.get(0).name()); + } + logger.info("No manifest file present in remote store for cluster name: {}, cluster UUID: {}, term: {}, version: {}", clusterName, clusterUUID, term, version); + return Optional.empty(); + } +} diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteObject.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteObject.java new file mode 100644 index 0000000000000..8802ab0e96c5a --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteObject.java @@ -0,0 +1,27 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + +import java.io.IOException; +import java.io.InputStream; +import org.opensearch.common.CheckedRunnable; +import org.opensearch.core.action.ActionListener; +import org.opensearch.core.common.bytes.BytesReference; + +public interface RemoteObject { + public T get(); + public String clusterUUID(); + public InputStream serialize() throws IOException; + public T deserialize(InputStream inputStream) throws IOException; + + public CheckedRunnable writeAsync(ActionListener listener); + public T read() throws IOException; + public void readAsync(ActionListener listener); + +} diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemotePersistentSettingsMetadata.java b/server/src/main/java/org/opensearch/gateway/remote/RemotePersistentSettingsMetadata.java new file mode 100644 index 0000000000000..f23cd6dcc1eb8 --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/RemotePersistentSettingsMetadata.java @@ -0,0 +1,108 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + +import static org.opensearch.gateway.remote.RemoteClusterStateUtils.DELIMITER; +import static org.opensearch.gateway.remote.RemoteClusterStateUtils.METADATA_NAME_FORMAT; +import static org.opensearch.gateway.remote.RemoteGlobalMetadataManager.GLOBAL_METADATA_CURRENT_CODEC_VERSION; + +import java.io.IOException; +import java.io.InputStream; +import java.util.List; +import org.opensearch.common.io.Streams; +import org.opensearch.common.settings.Settings; +import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedMetadata; +import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedMetadataAttribute; +import org.opensearch.index.remote.RemoteStoreUtils; +import org.opensearch.index.translog.transfer.BlobStoreTransferService; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.repositories.blobstore.ChecksumBlobStoreFormat; +import org.opensearch.threadpool.ThreadPool; + +public class RemotePersistentSettingsMetadata extends AbstractRemoteBlobStoreObject { + + public static final String SETTING_METADATA = "settings"; + + public static final ChecksumBlobStoreFormat SETTINGS_METADATA_FORMAT = new ChecksumBlobStoreFormat<>( + "settings", + METADATA_NAME_FORMAT, + Settings::fromXContent + ); + + private Settings persistentSettings; + private long metadataVersion; + private String blobName; + private final String clusterUUID; + + public RemotePersistentSettingsMetadata(Settings settings, long metadataVersion, String clusterUUID, BlobStoreTransferService blobStoreTransferService, BlobStoreRepository blobStoreRepository, String clusterName, + ThreadPool threadPool) { + super(blobStoreTransferService, blobStoreRepository, clusterName, threadPool); + this.persistentSettings = settings; + this.metadataVersion = metadataVersion; + this.clusterUUID = clusterUUID; + } + + public RemotePersistentSettingsMetadata(String blobName, String clusterUUID, BlobStoreTransferService blobStoreTransferService, BlobStoreRepository blobStoreRepository, String clusterName, + ThreadPool threadPool) { + super(blobStoreTransferService, blobStoreRepository, clusterName, threadPool); + this.blobName = blobName; + this.clusterUUID = clusterUUID; + } + + @Override + public BlobPathParameters getBlobPathParameters() { + return new BlobPathParameters(List.of("global-metadata"), SETTING_METADATA); + } + + @Override + public String getFullBlobName() { + return blobName; + } + + @Override + public String generateBlobFileName() { + String blobFileName = String.join( + DELIMITER, + getBlobPathParameters().getFilePrefix(), + RemoteStoreUtils.invertLong(metadataVersion), + RemoteStoreUtils.invertLong(System.currentTimeMillis()), + String.valueOf(GLOBAL_METADATA_CURRENT_CODEC_VERSION) + ); + // setting the full blob path with name for future access + this.blobName = getBlobPathForUpload().buildAsString() + blobFileName; + return blobFileName; + } + + @Override + public Settings get() { + return persistentSettings; + } + + @Override + public String clusterUUID() { + return clusterUUID; + } + + + @Override + public InputStream serialize() throws IOException { + return SETTINGS_METADATA_FORMAT.serialize(persistentSettings, generateBlobFileName(), getCompressor(), RemoteClusterStateUtils.FORMAT_PARAMS).streamInput(); + } + + @Override + public Settings deserialize(InputStream inputStream) throws IOException { + return SETTINGS_METADATA_FORMAT.deserialize(blobName, getBlobStoreRepository().getNamedXContentRegistry(), Streams.readFully(inputStream)); + } + + @Override + public UploadedMetadata getUploadedMetadata() { + assert blobName != null; + return new UploadedMetadataAttribute(SETTING_METADATA, blobName); + } +} diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteReadResult.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteReadResult.java new file mode 100644 index 0000000000000..dea8a6221d05a --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteReadResult.java @@ -0,0 +1,35 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + +import org.opensearch.core.xcontent.ToXContent; + +public class RemoteReadResult { + ToXContent obj; + String component; + String componentName; + + public RemoteReadResult(ToXContent obj, String component, String componentName) { + this.obj = obj; + this.component = component; + this.componentName = componentName; + } + + public ToXContent getObj() { + return obj; + } + + public String getComponent() { + return component; + } + + public String getComponentName() { + return componentName; + } +} diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteTemplatesMetadata.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteTemplatesMetadata.java new file mode 100644 index 0000000000000..9c302993536a9 --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteTemplatesMetadata.java @@ -0,0 +1,106 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + +import static org.opensearch.gateway.remote.RemoteClusterStateUtils.DELIMITER; +import static org.opensearch.gateway.remote.RemoteClusterStateUtils.METADATA_NAME_FORMAT; +import static org.opensearch.gateway.remote.RemoteGlobalMetadataManager.GLOBAL_METADATA_CURRENT_CODEC_VERSION; + +import java.io.IOException; +import java.io.InputStream; +import java.util.List; +import org.opensearch.cluster.metadata.TemplatesMetadata; +import org.opensearch.common.io.Streams; +import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedMetadata; +import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedMetadataAttribute; +import org.opensearch.index.remote.RemoteStoreUtils; +import org.opensearch.index.translog.transfer.BlobStoreTransferService; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.repositories.blobstore.ChecksumBlobStoreFormat; +import org.opensearch.threadpool.ThreadPool; + +public class RemoteTemplatesMetadata extends AbstractRemoteBlobStoreObject { + + public static final String TEMPLATES_METADATA = "templates"; + + public static final ChecksumBlobStoreFormat TEMPLATES_METADATA_FORMAT = new ChecksumBlobStoreFormat<>( + "templates", + METADATA_NAME_FORMAT, + TemplatesMetadata::fromXContent + ); + private TemplatesMetadata templatesMetadata; + private long metadataVersion; + private String blobName; + private final String clusterUUID; + public RemoteTemplatesMetadata(TemplatesMetadata templatesMetadata, long metadataVersion, String clusterUUID, BlobStoreTransferService blobStoreTransferService, BlobStoreRepository blobStoreRepository, String clusterName, + ThreadPool threadPool) { + super(blobStoreTransferService, blobStoreRepository, clusterName, threadPool); + this.templatesMetadata = templatesMetadata; + this.metadataVersion = metadataVersion; + this.clusterUUID = clusterUUID; + } + + public RemoteTemplatesMetadata(String blobName, String clusterUUID, BlobStoreTransferService blobStoreTransferService, BlobStoreRepository blobStoreRepository, String clusterName, + ThreadPool threadPool) { + super(blobStoreTransferService, blobStoreRepository, clusterName, threadPool); + this.blobName = blobName; + this.clusterUUID = clusterUUID; + } + + @Override + public BlobPathParameters getBlobPathParameters() { + return new BlobPathParameters(List.of("global-metadata"), TEMPLATES_METADATA); + } + + @Override + public String getFullBlobName() { + return blobName; + } + + @Override + public String generateBlobFileName() { + // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/global-metadata/______ + String blobFileName = String.join( + DELIMITER, + getBlobPathParameters().getFilePrefix(), + RemoteStoreUtils.invertLong(metadataVersion), + RemoteStoreUtils.invertLong(System.currentTimeMillis()), + String.valueOf(GLOBAL_METADATA_CURRENT_CODEC_VERSION) + ); + // setting the full blob path with name for future access + this.blobName = getBlobPathForUpload().buildAsString() + blobFileName; + return blobFileName; + } + + @Override + public TemplatesMetadata get() { + return templatesMetadata; + } + + @Override + public String clusterUUID() { + return clusterUUID; + } + + @Override + public InputStream serialize() throws IOException { + return TEMPLATES_METADATA_FORMAT.serialize(templatesMetadata, generateBlobFileName(), getCompressor(), RemoteClusterStateUtils.FORMAT_PARAMS).streamInput(); + } + + @Override + public TemplatesMetadata deserialize(InputStream inputStream) throws IOException { + return TEMPLATES_METADATA_FORMAT.deserialize(blobName, getBlobStoreRepository().getNamedXContentRegistry(), Streams.readFully(inputStream)); + } + + @Override + public UploadedMetadata getUploadedMetadata() { + assert blobName != null; + return new UploadedMetadataAttribute(TEMPLATES_METADATA, blobName); + } +} diff --git a/server/src/main/java/org/opensearch/gateway/remote/routingtable/IndexRoutingTableHeader.java b/server/src/main/java/org/opensearch/gateway/remote/routingtable/IndexRoutingTableHeader.java new file mode 100644 index 0000000000000..bc99fea9b5c09 --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/routingtable/IndexRoutingTableHeader.java @@ -0,0 +1,79 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote.routingtable; + +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.IndexFormatTooNewException; +import org.apache.lucene.index.IndexFormatTooOldException; +import org.apache.lucene.store.InputStreamDataInput; +import org.apache.lucene.store.OutputStreamDataOutput; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; + +import java.io.EOFException; +import java.io.IOException; + +/** + * The stored header information for the individual index routing table + */ +public class IndexRoutingTableHeader { + + public static final String INDEX_ROUTING_HEADER_CODEC = "index_routing_header_codec"; + public static final int INITIAL_VERSION = 1; + public static final int CURRENT_VERSION = INITIAL_VERSION; + private final String indexName; + + public IndexRoutingTableHeader(String indexName) { + this.indexName = indexName; + } + + /** + * Reads the contents on the byte array into the corresponding {@link IndexRoutingTableHeader} + * + * @param in + * @return IndexRoutingTableHeader + * @throws IOException + */ + public static IndexRoutingTableHeader read(StreamInput in) throws IOException { + try { + readHeaderVersion(in); + final String name = in.readString(); + return new IndexRoutingTableHeader(name); + } catch (EOFException e) { + throw new IOException("index routing header truncated", e); + } + } + + static int readHeaderVersion(final StreamInput in) throws IOException { + final int version; + try { + version = CodecUtil.checkHeader(new InputStreamDataInput(in), INDEX_ROUTING_HEADER_CODEC, INITIAL_VERSION, CURRENT_VERSION); + } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException e) { + throw new IOException("index routing table header corrupted", e); + } + return version; + } + + /** + * Returns the bytes reference for the {@link IndexRoutingTableHeader} + * + * @throws IOException + */ + public void write(StreamOutput out) throws IOException { + CodecUtil.writeHeader(new OutputStreamDataOutput(out), INDEX_ROUTING_HEADER_CODEC, CURRENT_VERSION); + out.writeString(indexName); + out.flush(); + } + + public String getIndexName() { + return indexName; + } + +} diff --git a/server/src/main/java/org/opensearch/gateway/remote/routingtable/IndexRoutingTableInputStream.java b/server/src/main/java/org/opensearch/gateway/remote/routingtable/IndexRoutingTableInputStream.java new file mode 100644 index 0000000000000..d4e2594bf153c --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/routingtable/IndexRoutingTableInputStream.java @@ -0,0 +1,173 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote.routingtable; + +import org.opensearch.cluster.routing.IndexRoutingTable; +import org.opensearch.cluster.routing.IndexShardRoutingTable; +import org.opensearch.common.io.stream.BufferedChecksumStreamOutput; +import org.opensearch.common.io.stream.BytesStreamOutput; +import org.opensearch.core.common.bytes.BytesReference; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Iterator; + +public class IndexRoutingTableInputStream extends InputStream { + + /** + * The buffer where data is stored. + */ + protected byte[] buf; + + /** + * The number of valid bytes in the buffer. + */ + protected int count; + + /** + * The buffer left over from the last fill + */ + protected byte[] leftOverBuf; + + /** + * The mark position + */ + protected int markPos = -1; + + /** + * The read limit + */ + protected int markLimit; + + /** + * The position + */ + protected int pos; + + private static final int BUFFER_SIZE = 8192; + + private final IndexRoutingTableHeader indexRoutingTableHeader; + private final Iterator shardIter; + private final BytesStreamOutput bytesStreamOutput; + private final BufferedChecksumStreamOutput out; + + public IndexRoutingTableInputStream(IndexRoutingTable indexRoutingTable) throws IOException { + this(indexRoutingTable, BUFFER_SIZE); + } + + public IndexRoutingTableInputStream(IndexRoutingTable indexRoutingTable, int size) throws IOException { + this.buf = new byte[size]; + this.shardIter = indexRoutingTable.iterator(); + this.indexRoutingTableHeader = new IndexRoutingTableHeader(indexRoutingTable.getIndex().getName()); + this.bytesStreamOutput = new BytesStreamOutput(); + this.out = new BufferedChecksumStreamOutput(bytesStreamOutput); + + initialFill(indexRoutingTable.shards().size()); + } + + @Override + public int read() throws IOException { + if (pos >= count) { + maybeResizeAndFill(); + if (pos >= count) return -1; + } + return buf[pos++] & 0xff; + } + + private void initialFill(int shardCount) throws IOException { + indexRoutingTableHeader.write(out); + out.writeVInt(shardCount); + + System.arraycopy(bytesStreamOutput.bytes().toBytesRef().bytes, 0, buf, 0, bytesStreamOutput.bytes().length()); + count = bytesStreamOutput.bytes().length(); + bytesStreamOutput.reset(); + fill(buf); + } + + private void fill(byte[] buf) throws IOException { + if (leftOverBuf != null) { + if (leftOverBuf.length > buf.length - count) { + // leftOverBuf has more content than length of buf, so we need to copy only based on buf length and keep the remaining in + // leftOverBuf. + System.arraycopy(leftOverBuf, 0, buf, count, buf.length - count); + byte[] tempLeftOverBuffer = new byte[leftOverBuf.length - (buf.length - count)]; + System.arraycopy(leftOverBuf, buf.length - count, tempLeftOverBuffer, 0, leftOverBuf.length - (buf.length - count)); + leftOverBuf = tempLeftOverBuffer; + count = buf.length - count; + } else { + System.arraycopy(leftOverBuf, 0, buf, count, leftOverBuf.length); + count += leftOverBuf.length; + leftOverBuf = null; + } + } + + if (count < buf.length && shardIter.hasNext()) { + IndexShardRoutingTable next = shardIter.next(); + IndexShardRoutingTable.Builder.writeTo(next, out); + // Add checksum for the file after all shards are done + if (!shardIter.hasNext()) { + out.writeLong(out.getChecksum()); + } + out.flush(); + BytesReference bytesRef = bytesStreamOutput.bytes(); + bytesStreamOutput.reset(); + + if (bytesRef.length() < buf.length - count) { + System.arraycopy(bytesRef.toBytesRef().bytes, 0, buf, count, bytesRef.length()); + count += bytesRef.length(); + leftOverBuf = null; + } else { + System.arraycopy(bytesRef.toBytesRef().bytes, 0, buf, count, buf.length - count); + leftOverBuf = new byte[bytesRef.length() - (buf.length - count)]; + System.arraycopy(bytesRef.toBytesRef().bytes, buf.length - count, leftOverBuf, 0, bytesRef.length() - (buf.length - count)); + count = buf.length; + } + } + } + + private void maybeResizeAndFill() throws IOException { + byte[] buffer = buf; + if (markPos == -1) pos = 0; /* no mark: throw away the buffer */ + else if (pos >= buffer.length) { /* no room left in buffer */ + if (markPos > 0) { /* can throw away early part of the buffer */ + int sz = pos - markPos; + System.arraycopy(buffer, markPos, buffer, 0, sz); + pos = sz; + markPos = 0; + } else if (buffer.length >= markLimit) { + markPos = -1; /* buffer got too big, invalidate mark */ + pos = 0; /* drop buffer contents */ + } else { /* grow buffer */ + int nsz = markLimit + 1; + byte[] nbuf = new byte[nsz]; + System.arraycopy(buffer, 0, nbuf, 0, pos); + buffer = nbuf; + } + } + count = pos; + fill(buffer); + } + + @Override + public void mark(int readlimit) { + markLimit = readlimit; + markPos = pos; + } + + @Override + public boolean markSupported() { + return true; + } + + @Override + public void reset() throws IOException { + if (markPos < 0) throw new IOException("Resetting to invalid mark"); + pos = markPos; + } +} diff --git a/server/src/main/java/org/opensearch/gateway/remote/routingtable/IndexRoutingTableInputStreamReader.java b/server/src/main/java/org/opensearch/gateway/remote/routingtable/IndexRoutingTableInputStreamReader.java new file mode 100644 index 0000000000000..e2b4f5da5f6e9 --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/routingtable/IndexRoutingTableInputStreamReader.java @@ -0,0 +1,70 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote.routingtable; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.cluster.routing.IndexRoutingTable; +import org.opensearch.cluster.routing.IndexShardRoutingTable; +import org.opensearch.common.io.stream.BufferedChecksumStreamInput; +import org.opensearch.core.common.io.stream.InputStreamStreamInput; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.index.Index; + +import java.io.EOFException; +import java.io.IOException; +import java.io.InputStream; + +public class IndexRoutingTableInputStreamReader { + + private final StreamInput streamInput; + + private static final Logger logger = LogManager.getLogger(IndexRoutingTableInputStreamReader.class); + + public IndexRoutingTableInputStreamReader(InputStream inputStream) throws IOException { + streamInput = new InputStreamStreamInput(inputStream); + } + + public IndexRoutingTable readIndexRoutingTable(Index index) throws IOException { + try { + try (BufferedChecksumStreamInput in = new BufferedChecksumStreamInput(streamInput, "assertion")) { + // Read the Table Header first and confirm the index + IndexRoutingTableHeader indexRoutingTableHeader = IndexRoutingTableHeader.read(in); + assert indexRoutingTableHeader.getIndexName().equals(index.getName()); + + int numberOfShardRouting = in.readVInt(); + logger.debug("Number of Index Routing Table {}", numberOfShardRouting); + IndexRoutingTable.Builder indicesRoutingTable = IndexRoutingTable.builder(index); + for (int idx = 0; idx < numberOfShardRouting; idx++) { + IndexShardRoutingTable indexShardRoutingTable = IndexShardRoutingTable.Builder.readFrom(in); + logger.debug("Index Shard Routing Table reading {}", indexShardRoutingTable); + indicesRoutingTable.addIndexShard(indexShardRoutingTable); + + } + verifyCheckSum(in); + return indicesRoutingTable.build(); + } + } catch (EOFException e) { + throw new IOException("Indices Routing table is corrupted", e); + } + } + + private void verifyCheckSum(BufferedChecksumStreamInput in) throws IOException { + long expectedChecksum = in.getChecksum(); + long readChecksum = in.readLong(); + if (readChecksum != expectedChecksum) { + throw new IOException( + "checksum verification failed - expected: 0x" + + Long.toHexString(expectedChecksum) + + ", got: 0x" + + Long.toHexString(readChecksum) + ); + } + } +} diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteIndexPathUploader.java b/server/src/main/java/org/opensearch/index/remote/RemoteIndexPathUploader.java new file mode 100644 index 0000000000000..ffdc4729d7f21 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/remote/RemoteIndexPathUploader.java @@ -0,0 +1,306 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.remote; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.message.ParameterizedMessage; +import org.opensearch.action.LatchedActionListener; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.UUIDs; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.blobstore.BlobContainer; +import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.action.ActionListener; +import org.opensearch.core.index.Index; +import org.opensearch.gateway.remote.IndexMetadataUploadListener; +import static org.opensearch.gateway.remote.RemoteClusterStateUtils.RemoteStateTransferException; +import org.opensearch.index.remote.RemoteStoreEnums.PathType; +import org.opensearch.node.Node; +import org.opensearch.node.remotestore.RemoteStoreNodeAttribute; +import org.opensearch.repositories.RepositoriesService; +import org.opensearch.repositories.Repository; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.repositories.blobstore.ConfigBlobStoreFormat; +import org.opensearch.threadpool.ThreadPool; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Objects; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; +import java.util.stream.Collectors; + +import static org.opensearch.gateway.remote.RemoteIndexMetadataManager.INDEX_METADATA_UPLOAD_TIMEOUT_SETTING; +import static org.opensearch.index.remote.RemoteIndexPath.COMBINED_PATH; +import static org.opensearch.index.remote.RemoteIndexPath.SEGMENT_PATH; +import static org.opensearch.index.remote.RemoteIndexPath.TRANSLOG_PATH; +import static org.opensearch.index.remote.RemoteStoreUtils.determineRemoteStorePathStrategy; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.isRemoteDataAttributePresent; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.isRemoteStoreClusterStateEnabled; + +/** + * Uploads the remote store path for all possible combinations of {@link org.opensearch.index.remote.RemoteStoreEnums.DataCategory} + * and {@link org.opensearch.index.remote.RemoteStoreEnums.DataType} for each shard of an index. + * + * @opensearch.internal + */ +@ExperimentalApi +public class RemoteIndexPathUploader extends IndexMetadataUploadListener { + + public static final String DELIMITER = "#"; + public static final ConfigBlobStoreFormat REMOTE_INDEX_PATH_FORMAT = new ConfigBlobStoreFormat<>( + RemoteIndexPath.FILE_NAME_FORMAT + ); + + private static final String TIMEOUT_EXCEPTION_MSG = "Timed out waiting while uploading remote index path file for indexes=%s"; + private static final String UPLOAD_EXCEPTION_MSG = "Exception occurred while uploading remote index paths for indexes=%s"; + static final String TRANSLOG_REPO_NAME_KEY = Node.NODE_ATTRIBUTES.getKey() + + RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY; + static final String SEGMENT_REPO_NAME_KEY = Node.NODE_ATTRIBUTES.getKey() + + RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY; + + private static final Logger logger = LogManager.getLogger(RemoteIndexPathUploader.class); + + private final Settings settings; + private final boolean isRemoteDataAttributePresent; + private final boolean isTranslogSegmentRepoSame; + private final Supplier repositoriesService; + private volatile TimeValue indexMetadataUploadTimeout; + + private BlobStoreRepository translogRepository; + private BlobStoreRepository segmentRepository; + + public RemoteIndexPathUploader( + ThreadPool threadPool, + Settings settings, + Supplier repositoriesService, + ClusterSettings clusterSettings + ) { + super(threadPool, ThreadPool.Names.GENERIC); + this.settings = Objects.requireNonNull(settings); + this.repositoriesService = Objects.requireNonNull(repositoriesService); + isRemoteDataAttributePresent = isRemoteDataAttributePresent(settings); + // If the remote data attributes are not present, then there is no effect of translog and segment being same or different or null. + isTranslogSegmentRepoSame = isTranslogSegmentRepoSame(); + Objects.requireNonNull(clusterSettings); + indexMetadataUploadTimeout = clusterSettings.get(INDEX_METADATA_UPLOAD_TIMEOUT_SETTING); + clusterSettings.addSettingsUpdateConsumer(INDEX_METADATA_UPLOAD_TIMEOUT_SETTING, this::setIndexMetadataUploadTimeout); + } + + @Override + protected void doOnUpload( + List indexMetadataList, + Map prevIndexMetadataByName, + ActionListener actionListener + ) { + if (isRemoteDataAttributePresent == false) { + logger.trace("Skipping beforeNewIndexUpload as there are no remote indexes"); + actionListener.onResponse(null); + return; + } + + long startTime = System.nanoTime(); + boolean success = false; + List eligibleList = indexMetadataList.stream() + .filter(idxMd -> requiresPathUpload(idxMd, prevIndexMetadataByName.get(idxMd.getIndex().getName()))) + .collect(Collectors.toList()); + String indexNames = eligibleList.stream().map(IndexMetadata::getIndex).map(Index::toString).collect(Collectors.joining(",")); + int latchCount = eligibleList.size() * (isTranslogSegmentRepoSame ? 1 : 2); + CountDownLatch latch = new CountDownLatch(latchCount); + List exceptionList = Collections.synchronizedList(new ArrayList<>(latchCount)); + try { + for (IndexMetadata indexMetadata : eligibleList) { + writeIndexPathAsync(indexMetadata, latch, exceptionList); + } + + logger.trace(new ParameterizedMessage("Remote index path upload started for {}", indexNames)); + + try { + if (latch.await(indexMetadataUploadTimeout.millis(), TimeUnit.MILLISECONDS) == false) { + RemoteStateTransferException ex = new RemoteStateTransferException( + String.format(Locale.ROOT, TIMEOUT_EXCEPTION_MSG, indexNames) + ); + exceptionList.forEach(ex::addSuppressed); + actionListener.onFailure(ex); + return; + } + } catch (InterruptedException exception) { + exceptionList.forEach(exception::addSuppressed); + RemoteStateTransferException ex = new RemoteStateTransferException( + String.format(Locale.ROOT, TIMEOUT_EXCEPTION_MSG, indexNames), + exception + ); + actionListener.onFailure(ex); + return; + } + if (exceptionList.size() > 0) { + RemoteStateTransferException ex = new RemoteStateTransferException( + String.format(Locale.ROOT, UPLOAD_EXCEPTION_MSG, indexNames) + ); + exceptionList.forEach(ex::addSuppressed); + actionListener.onFailure(ex); + return; + } + success = true; + actionListener.onResponse(null); + } catch (Exception exception) { + RemoteStateTransferException ex = new RemoteStateTransferException( + String.format(Locale.ROOT, UPLOAD_EXCEPTION_MSG, indexNames), + exception + ); + exceptionList.forEach(ex::addSuppressed); + actionListener.onFailure(ex); + } finally { + long tookTimeNs = System.nanoTime() - startTime; + logger.trace(new ParameterizedMessage("executed beforeNewIndexUpload status={} tookTimeNs={}", success, tookTimeNs)); + } + + } + + private void writeIndexPathAsync(IndexMetadata idxMD, CountDownLatch latch, List exceptionList) { + if (isTranslogSegmentRepoSame) { + // If the repositories are same, then we need to upload a single file containing paths for both translog and segments. + writePathToRemoteStore(idxMD, translogRepository, latch, exceptionList, COMBINED_PATH); + } else { + // If the repositories are different, then we need to upload one file per segment and translog containing their individual + // paths. + writePathToRemoteStore(idxMD, translogRepository, latch, exceptionList, TRANSLOG_PATH); + writePathToRemoteStore(idxMD, segmentRepository, latch, exceptionList, SEGMENT_PATH); + } + } + + private void writePathToRemoteStore( + IndexMetadata idxMD, + BlobStoreRepository repository, + CountDownLatch latch, + List exceptionList, + Map> pathCreationMap + ) { + Map remoteCustomData = idxMD.getCustomData(IndexMetadata.REMOTE_STORE_CUSTOM_KEY); + PathType pathType = PathType.valueOf(remoteCustomData.get(PathType.NAME)); + RemoteStoreEnums.PathHashAlgorithm hashAlgorithm = RemoteStoreEnums.PathHashAlgorithm.valueOf( + remoteCustomData.get(RemoteStoreEnums.PathHashAlgorithm.NAME) + ); + String indexUUID = idxMD.getIndexUUID(); + int shardCount = idxMD.getNumberOfShards(); + BlobPath basePath = repository.basePath(); + BlobContainer blobContainer = repository.blobStore().blobContainer(basePath.add(RemoteIndexPath.DIR)); + ActionListener actionListener = getUploadPathLatchedActionListener(idxMD, latch, exceptionList, pathCreationMap); + try { + RemoteIndexPath remoteIndexPath = new RemoteIndexPath( + indexUUID, + shardCount, + basePath, + pathType, + hashAlgorithm, + pathCreationMap + ); + String fileName = generateFileName(indexUUID, idxMD.getVersion(), remoteIndexPath.getVersion()); + REMOTE_INDEX_PATH_FORMAT.writeAsyncWithUrgentPriority(remoteIndexPath, blobContainer, fileName, actionListener); + } catch (IOException ioException) { + RemoteStateTransferException ex = new RemoteStateTransferException( + String.format(Locale.ROOT, UPLOAD_EXCEPTION_MSG, List.of(idxMD.getIndex().getName())), + ioException + ); + actionListener.onFailure(ex); + } + } + + private Repository validateAndGetRepository(String repoSetting) { + final String repo = settings.get(repoSetting); + assert repo != null : "Remote " + repoSetting + " repository is not configured"; + final Repository repository = repositoriesService.get().repository(repo); + assert repository instanceof BlobStoreRepository : "Repository should be instance of BlobStoreRepository"; + return repository; + } + + public void start() { + assert isRemoteStoreClusterStateEnabled(settings) == true : "Remote cluster state is not enabled"; + if (isRemoteDataAttributePresent == false) { + // If remote store data attributes are not present than we skip this. + return; + } + translogRepository = (BlobStoreRepository) validateAndGetRepository(TRANSLOG_REPO_NAME_KEY); + segmentRepository = (BlobStoreRepository) validateAndGetRepository(SEGMENT_REPO_NAME_KEY); + } + + private boolean isTranslogSegmentRepoSame() { + // TODO - The current comparison checks the repository name. But it is also possible that the repository are same + // by attributes, but different by name. We need to handle this. + String translogRepoName = settings.get(TRANSLOG_REPO_NAME_KEY); + String segmentRepoName = settings.get(SEGMENT_REPO_NAME_KEY); + return Objects.equals(translogRepoName, segmentRepoName); + } + + private LatchedActionListener getUploadPathLatchedActionListener( + IndexMetadata indexMetadata, + CountDownLatch latch, + List exceptionList, + Map> pathCreationMap + ) { + return new LatchedActionListener<>( + ActionListener.wrap( + resp -> logger.trace( + new ParameterizedMessage("Index path uploaded for {} indexMetadata={}", pathCreationMap, indexMetadata) + ), + ex -> { + logger.error( + new ParameterizedMessage( + "Exception during Index path upload for {} indexMetadata={}", + pathCreationMap, + indexMetadata + ), + ex + ); + exceptionList.add(ex); + } + ), + latch + ); + } + + /** + * This method checks if the index metadata has attributes that calls for uploading the index path for remote store + * uploads. It checks if the remote store path type is {@code HASHED_PREFIX} and returns true if so. + */ + private boolean requiresPathUpload(IndexMetadata indexMetadata, IndexMetadata prevIndexMetadata) { + PathType pathType = determineRemoteStorePathStrategy(indexMetadata).getType(); + PathType prevPathType = Objects.nonNull(prevIndexMetadata) ? determineRemoteStorePathStrategy(prevIndexMetadata).getType() : null; + // If previous metadata is null or previous path type is not hashed_prefix, and along with new path type being + // hashed_prefix, then this can mean any of the following - + // 1. This is creation of remote index with hashed_prefix + // 2. We are enabling cluster state for the very first time with multiple indexes having hashed_prefix path type. + // 3. A docrep index is being migrated to being remote store index. + return pathType == PathType.HASHED_PREFIX && (Objects.isNull(prevPathType) || prevPathType != PathType.HASHED_PREFIX); + } + + private void setIndexMetadataUploadTimeout(TimeValue newIndexMetadataUploadTimeout) { + this.indexMetadataUploadTimeout = newIndexMetadataUploadTimeout; + } + + /** + * Creates a file name by combining index uuid, index metadata version and file version. # has been chosen as the + * delimiter since it does not collide with any possible letters in file name. The random base64 uuid is added to + * ensure that the file does not get overwritten. We do check if translog and segment repo are same by name, but + * it is possible that a user configures same repo by different name for translog and segment in which case, this + * will lead to file not being overwritten. + */ + private String generateFileName(String indexUUID, long indexMetadataVersion, String fileVersion) { + return String.join(DELIMITER, indexUUID, Long.toString(indexMetadataVersion), fileVersion, UUIDs.randomBase64UUID()); + } +} diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteStoreEnums.java b/server/src/main/java/org/opensearch/index/remote/RemoteStoreEnums.java new file mode 100644 index 0000000000000..78361079c9176 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/remote/RemoteStoreEnums.java @@ -0,0 +1,307 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.remote; + +import org.apache.logging.log4j.message.ParameterizedMessage; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.annotation.PublicApi; +import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.common.hash.FNV1a; +import org.opensearch.index.remote.RemoteStorePathStrategy.PathInput; + +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; +import java.util.Objects; +import java.util.Set; + +import static java.util.Collections.unmodifiableMap; +import static org.opensearch.index.remote.RemoteStoreEnums.DataType.DATA; +import static org.opensearch.index.remote.RemoteStoreEnums.DataType.METADATA; +import static org.opensearch.index.remote.RemoteStoreUtils.longToCompositeBase64AndBinaryEncoding; +import static org.opensearch.index.remote.RemoteStoreUtils.longToUrlBase64; + +/** + * This class contains the different enums related to remote store like data categories and types, path types + * and hashing algorithm. + * + * @opensearch.api + */ +@ExperimentalApi +public class RemoteStoreEnums { + + /** + * Categories of the data in Remote store. + */ + @PublicApi(since = "2.14.0") + @ExperimentalApi + public enum DataCategory { + SEGMENTS("segments", Set.of(DataType.values())), + TRANSLOG("translog", Set.of(DATA, METADATA)); + + private final String name; + private final Set supportedDataTypes; + + DataCategory(String name, Set supportedDataTypes) { + this.name = name; + this.supportedDataTypes = supportedDataTypes; + } + + public boolean isSupportedDataType(DataType dataType) { + return supportedDataTypes.contains(dataType); + } + + public String getName() { + return name; + } + } + + /** + * Types of data in remote store. + */ + @PublicApi(since = "2.14.0") + @ExperimentalApi + public enum DataType { + DATA("data"), + METADATA("metadata"), + LOCK_FILES("lock_files"); + + private final String name; + + DataType(String name) { + this.name = name; + } + + public String getName() { + return name; + } + } + + /** + * Enumerates the types of remote store paths resolution techniques supported by OpenSearch. + * For more information, see Github issue #12567. + */ + @PublicApi(since = "2.14.0") + @ExperimentalApi + public enum PathType { + FIXED(0) { + @Override + public BlobPath generatePath(PathInput pathInput, PathHashAlgorithm hashAlgorithm) { + assert Objects.isNull(hashAlgorithm) : "hashAlgorithm is expected to be null with fixed remote store path type"; + // Hash algorithm is not used in FIXED path type + return pathInput.basePath() + .add(pathInput.indexUUID()) + .add(pathInput.shardId()) + .add(pathInput.dataCategory().getName()) + .add(pathInput.dataType().getName()); + } + + @Override + boolean requiresHashAlgorithm() { + return false; + } + }, + HASHED_PREFIX(1) { + @Override + public BlobPath generatePath(PathInput pathInput, PathHashAlgorithm hashAlgorithm) { + assert Objects.nonNull(hashAlgorithm) : "hashAlgorithm is expected to be non-null"; + BlobPath path = BlobPath.cleanPath() + .add(hashAlgorithm.hash(pathInput)) + .add(pathInput.basePath()) + .add(pathInput.indexUUID()); + if (pathInput.shardId() != null) { + path.add(pathInput.shardId()); + } + if(pathInput.dataCategory() != null){ + path.add(pathInput.dataCategory().getName()); + } + if(pathInput.dataType() != null ) { + path.add(pathInput.dataType().getName()); + } + return path; + } + + @Override + boolean requiresHashAlgorithm() { + return true; + } + }, + HASHED_INFIX(2) { + @Override + public BlobPath generatePath(PathInput pathInput, PathHashAlgorithm hashAlgorithm) { + assert Objects.nonNull(hashAlgorithm) : "hashAlgorithm is expected to be non-null"; + return pathInput.basePath() + .add(hashAlgorithm.hash(pathInput)) + .add(pathInput.indexUUID()) + .add(pathInput.shardId()) + .add(pathInput.dataCategory().getName()) + .add(pathInput.dataType().getName()); + } + + @Override + boolean requiresHashAlgorithm() { + return true; + } + }; + + private final int code; + + PathType(int code) { + this.code = code; + } + + public int getCode() { + return code; + } + + private static final Map CODE_TO_ENUM; + + static { + PathType[] values = values(); + Map codeToStatus = new HashMap<>(values.length); + for (PathType value : values) { + int code = value.code; + if (codeToStatus.containsKey(code)) { + throw new IllegalStateException( + new ParameterizedMessage("{} has same code as {}", codeToStatus.get(code), value).getFormattedMessage() + ); + } + codeToStatus.put(code, value); + } + CODE_TO_ENUM = unmodifiableMap(codeToStatus); + } + + /** + * Turn a status code into a {@link PathType}. + */ + public static PathType fromCode(int code) { + return CODE_TO_ENUM.get(code); + } + + /** + * This method generates the path for the given path input which constitutes multiple fields and characteristics + * of the data. + * + * @param pathInput input. + * @param hashAlgorithm hashing algorithm. + * @return the blob path for the path input. + */ + public BlobPath path(PathInput pathInput, PathHashAlgorithm hashAlgorithm) { + DataCategory dataCategory = pathInput.dataCategory(); + DataType dataType = pathInput.dataType(); +// assert dataCategory.isSupportedDataType(dataType) : "category:" +// + dataCategory +// + " type:" +// + dataType +// + " are not supported together"; + return generatePath(pathInput, hashAlgorithm); + } + + protected abstract BlobPath generatePath(PathInput pathInput, PathHashAlgorithm hashAlgorithm); + + abstract boolean requiresHashAlgorithm(); + + public static PathType parseString(String pathType) { + try { + return PathType.valueOf(pathType.toUpperCase(Locale.ROOT)); + } catch (IllegalArgumentException | NullPointerException e) { + // IllegalArgumentException is thrown when the input does not match any enum name + // NullPointerException is thrown when the input is null + throw new IllegalArgumentException("Could not parse PathType for [" + pathType + "]"); + } + } + + /** + * This string is used as key for storing information in the custom data in index settings. + */ + public static final String NAME = "path_type"; + + } + + /** + * Type of hashes supported for path types that have hashing. + */ + @PublicApi(since = "2.14.0") + @ExperimentalApi + public enum PathHashAlgorithm { + + FNV_1A_BASE64(0) { + @Override + String hash(PathInput pathInput) { + String input = pathInput.indexUUID() + pathInput.shardId() + (pathInput.dataCategory() != null ? pathInput.dataCategory().getName(): "" )+ (pathInput.dataType()!= null ? pathInput.dataType().getName(): ""); + long hash = FNV1a.hash64(input); + return longToUrlBase64(hash); + } + }, + /** + * This hash algorithm will generate a hash value which will use 1st 6 bits to create bas64 character and next 14 + * bits to create binary string. + */ + FNV_1A_COMPOSITE_1(1) { + @Override + String hash(PathInput pathInput) { + String input = pathInput.indexUUID() + pathInput.shardId() + pathInput.dataCategory().getName() + pathInput.dataType() + .getName(); + long hash = FNV1a.hash64(input); + return longToCompositeBase64AndBinaryEncoding(hash, 20); + } + }; + + private final int code; + + PathHashAlgorithm(int code) { + this.code = code; + } + + public int getCode() { + return code; + } + + private static final Map CODE_TO_ENUM; + + static { + PathHashAlgorithm[] values = values(); + Map codeToStatus = new HashMap<>(values.length); + for (PathHashAlgorithm value : values) { + int code = value.code; + if (codeToStatus.containsKey(code)) { + throw new IllegalStateException( + new ParameterizedMessage("{} has same code as {}", codeToStatus.get(code), value).getFormattedMessage() + ); + } + codeToStatus.put(code, value); + } + CODE_TO_ENUM = unmodifiableMap(codeToStatus); + } + + /** + * Turn a status code into a {@link PathHashAlgorithm}. + */ + public static PathHashAlgorithm fromCode(int code) { + return CODE_TO_ENUM.get(code); + } + + abstract String hash(PathInput pathInput); + + public static PathHashAlgorithm parseString(String pathHashAlgorithm) { + try { + return PathHashAlgorithm.valueOf(pathHashAlgorithm.toUpperCase(Locale.ROOT)); + } catch (IllegalArgumentException | NullPointerException e) { + // IllegalArgumentException is thrown when the input does not match any enum name + // NullPointerException is thrown when the input is null + throw new IllegalArgumentException("Could not parse PathHashAlgorithm for [" + pathHashAlgorithm + "]"); + } + } + + /** + * This string is used as key for storing information in the custom data in index settings. + */ + public static final String NAME = "path_hash_algorithm"; + } +} diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteStorePathStrategy.java b/server/src/main/java/org/opensearch/index/remote/RemoteStorePathStrategy.java new file mode 100644 index 0000000000000..b5f28e4b6fdee --- /dev/null +++ b/server/src/main/java/org/opensearch/index/remote/RemoteStorePathStrategy.java @@ -0,0 +1,170 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.remote; + +import org.apache.logging.log4j.message.ParameterizedMessage; +import org.opensearch.common.Nullable; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.annotation.PublicApi; +import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.index.remote.RemoteStoreEnums.DataCategory; +import org.opensearch.index.remote.RemoteStoreEnums.DataType; +import org.opensearch.index.remote.RemoteStoreEnums.PathHashAlgorithm; +import org.opensearch.index.remote.RemoteStoreEnums.PathType; + +import java.util.Objects; + +/** + * This class wraps internal details on the remote store path for an index. + * + * @opensearch.internal + */ +@PublicApi(since = "2.14.0") +@ExperimentalApi +public class RemoteStorePathStrategy { + + private final PathType type; + + @Nullable + private final PathHashAlgorithm hashAlgorithm; + + public RemoteStorePathStrategy(PathType type) { + this(type, null); + } + + public RemoteStorePathStrategy(PathType type, PathHashAlgorithm hashAlgorithm) { + Objects.requireNonNull(type, "pathType can not be null"); + if (isCompatible(type, hashAlgorithm) == false) { + throw new IllegalArgumentException( + new ParameterizedMessage("pathType={} pathHashAlgorithm={} are incompatible", type, hashAlgorithm).getFormattedMessage() + ); + } + this.type = type; + this.hashAlgorithm = hashAlgorithm; + } + + public static boolean isCompatible(PathType type, PathHashAlgorithm hashAlgorithm) { + return (type.requiresHashAlgorithm() == false && Objects.isNull(hashAlgorithm)) + || (type.requiresHashAlgorithm() && Objects.nonNull(hashAlgorithm)); + } + + public PathType getType() { + return type; + } + + public PathHashAlgorithm getHashAlgorithm() { + return hashAlgorithm; + } + + @Override + public String toString() { + return "RemoteStorePathStrategy{" + "type=" + type + ", hashAlgorithm=" + hashAlgorithm + '}'; + } + + public BlobPath generatePath(PathInput pathInput) { + return type.path(pathInput, hashAlgorithm); + } + + /** + * Wrapper class for the input required to generate path for remote store uploads. + * @opensearch.internal + */ + @PublicApi(since = "2.14.0") + @ExperimentalApi + public static class PathInput { + private final BlobPath basePath; + private final String indexUUID; + private final String shardId; + private final DataCategory dataCategory; + private final DataType dataType; + + public PathInput(BlobPath basePath, String indexUUID, String shardId, DataCategory dataCategory, DataType dataType) { + this.basePath = Objects.requireNonNull(basePath); + this.indexUUID = Objects.requireNonNull(indexUUID); +// this.shardId = Objects.requireNonNull(shardId); +// this.dataCategory = Objects.requireNonNull(dataCategory); +// this.dataType = Objects.requireNonNull(dataType); + this.shardId =(shardId); + this.dataCategory = (dataCategory); + this.dataType = (dataType); + } + + BlobPath basePath() { + return basePath; + } + + String indexUUID() { + return indexUUID; + } + + String shardId() { + return shardId; + } + + DataCategory dataCategory() { + return dataCategory; + } + + DataType dataType() { + return dataType; + } + + /** + * Returns a new builder for {@link PathInput}. + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Builder for {@link PathInput}. + * + * @opensearch.internal + */ + @PublicApi(since = "2.14.0") + @ExperimentalApi + public static class Builder { + private BlobPath basePath; + private String indexUUID; + private String shardId; + private DataCategory dataCategory; + private DataType dataType; + + public Builder basePath(BlobPath basePath) { + this.basePath = basePath; + return this; + } + + public Builder indexUUID(String indexUUID) { + this.indexUUID = indexUUID; + return this; + } + + public Builder shardId(String shardId) { + this.shardId = shardId; + return this; + } + + public Builder dataCategory(DataCategory dataCategory) { + this.dataCategory = dataCategory; + return this; + } + + public Builder dataType(DataType dataType) { + this.dataType = dataType; + return this; + } + + public PathInput build() { + return new PathInput(basePath, indexUUID, shardId, dataCategory, dataType); + } + } + } + +} diff --git a/server/src/main/java/org/opensearch/index/translog/BaseTranslogReader.java b/server/src/main/java/org/opensearch/index/translog/BaseTranslogReader.java index d6fa2a2e53de3..9088eb6b20fb8 100644 --- a/server/src/main/java/org/opensearch/index/translog/BaseTranslogReader.java +++ b/server/src/main/java/org/opensearch/index/translog/BaseTranslogReader.java @@ -32,6 +32,7 @@ package org.opensearch.index.translog; +import org.opensearch.common.io.stream.BufferedChecksumStreamInput; import org.opensearch.core.common.io.stream.ByteBufferStreamInput; import org.opensearch.index.seqno.SequenceNumbers; diff --git a/server/src/main/java/org/opensearch/index/translog/Translog.java b/server/src/main/java/org/opensearch/index/translog/Translog.java index 9f877e87415dd..eab0528598841 100644 --- a/server/src/main/java/org/opensearch/index/translog/Translog.java +++ b/server/src/main/java/org/opensearch/index/translog/Translog.java @@ -38,6 +38,8 @@ import org.opensearch.common.Nullable; import org.opensearch.common.UUIDs; import org.opensearch.common.annotation.PublicApi; +import org.opensearch.common.io.stream.BufferedChecksumStreamInput; +import org.opensearch.common.io.stream.BufferedChecksumStreamOutput; import org.opensearch.common.io.stream.ReleasableBytesStreamOutput; import org.opensearch.common.lease.Releasable; import org.opensearch.common.lease.Releasables; diff --git a/server/src/main/java/org/opensearch/index/translog/TranslogHeader.java b/server/src/main/java/org/opensearch/index/translog/TranslogHeader.java index 7b5be9505f27a..c622214a3a69d 100644 --- a/server/src/main/java/org/opensearch/index/translog/TranslogHeader.java +++ b/server/src/main/java/org/opensearch/index/translog/TranslogHeader.java @@ -40,6 +40,8 @@ import org.apache.lucene.store.OutputStreamDataOutput; import org.apache.lucene.util.BytesRef; import org.opensearch.common.io.Channels; +import org.opensearch.common.io.stream.BufferedChecksumStreamInput; +import org.opensearch.common.io.stream.BufferedChecksumStreamOutput; import org.opensearch.core.common.io.stream.InputStreamStreamInput; import org.opensearch.core.common.io.stream.OutputStreamStreamOutput; import org.opensearch.core.common.io.stream.StreamInput; diff --git a/server/src/main/java/org/opensearch/index/translog/TranslogSnapshot.java b/server/src/main/java/org/opensearch/index/translog/TranslogSnapshot.java index 89718156cbbe8..a6e322bf58fff 100644 --- a/server/src/main/java/org/opensearch/index/translog/TranslogSnapshot.java +++ b/server/src/main/java/org/opensearch/index/translog/TranslogSnapshot.java @@ -32,6 +32,7 @@ package org.opensearch.index.translog; import org.opensearch.common.io.Channels; +import org.opensearch.common.io.stream.BufferedChecksumStreamInput; import org.opensearch.index.seqno.SequenceNumbers; import java.io.EOFException; diff --git a/server/src/main/java/org/opensearch/index/translog/TranslogWriter.java b/server/src/main/java/org/opensearch/index/translog/TranslogWriter.java index 86f7567f3333d..da3c7a8dee219 100644 --- a/server/src/main/java/org/opensearch/index/translog/TranslogWriter.java +++ b/server/src/main/java/org/opensearch/index/translog/TranslogWriter.java @@ -42,6 +42,7 @@ import org.opensearch.common.collect.Tuple; import org.opensearch.common.io.Channels; import org.opensearch.common.io.DiskIoBufferPool; +import org.opensearch.common.io.stream.BufferedChecksumStreamInput; import org.opensearch.common.io.stream.ReleasableBytesStreamOutput; import org.opensearch.common.lease.Releasables; import org.opensearch.common.util.BigArrays; diff --git a/server/src/main/java/org/opensearch/index/translog/transfer/BlobStoreTransferService.java b/server/src/main/java/org/opensearch/index/translog/transfer/BlobStoreTransferService.java index 82dd6301ef79f..a1f5d6aa96755 100644 --- a/server/src/main/java/org/opensearch/index/translog/transfer/BlobStoreTransferService.java +++ b/server/src/main/java/org/opensearch/index/translog/transfer/BlobStoreTransferService.java @@ -11,6 +11,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; +import org.apache.lucene.store.IndexInput; import org.opensearch.action.ActionRunnable; import org.opensearch.common.blobstore.AsyncMultiStreamBlobContainer; import org.opensearch.common.blobstore.BlobContainer; @@ -21,7 +22,11 @@ import org.opensearch.common.blobstore.stream.write.WritePriority; import org.opensearch.common.blobstore.transfer.RemoteTransferContainer; import org.opensearch.common.blobstore.transfer.stream.OffsetRangeFileInputStream; +import org.opensearch.common.blobstore.transfer.stream.OffsetRangeIndexInputStream; +import org.opensearch.common.lucene.store.ByteArrayIndexInput; import org.opensearch.core.action.ActionListener; +import org.opensearch.core.common.bytes.BytesReference; +import org.opensearch.index.store.exception.ChecksumCombinationException; import org.opensearch.index.translog.ChannelFactory; import org.opensearch.index.translog.transfer.FileSnapshot.TransferFileSnapshot; import org.opensearch.threadpool.ThreadPool; @@ -36,6 +41,7 @@ import java.util.Set; import static org.opensearch.common.blobstore.BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC; +import static org.opensearch.common.blobstore.transfer.RemoteTransferContainer.checksumOfChecksum; /** * Service that handles remote transfer of translog and checkpoint files @@ -102,6 +108,48 @@ public void uploadBlobs( } + @Override + public void uploadBlob(InputStream inputStream, Iterable remotePath, String blobName, WritePriority writePriority, ActionListener listener) throws IOException { + assert remotePath instanceof BlobPath; + BlobPath blobPath = (BlobPath) remotePath; + final BlobContainer blobContainer = blobStore.blobContainer(blobPath); + if (blobContainer instanceof AsyncMultiStreamBlobContainer == false) { + blobContainer.writeBlob(blobName, inputStream, inputStream.available(), false); + listener.onResponse(null); + return; + } + final String resourceDescription = "BlobStoreTransferService.uploadBlob(blob=\"" + blobName + "\")"; + byte[] bytes = inputStream.readAllBytes(); + try (IndexInput input = new ByteArrayIndexInput(resourceDescription, bytes)) { + long expectedChecksum; + try { + expectedChecksum = checksumOfChecksum(input.clone(), 8); + } catch (Exception e) { + throw new ChecksumCombinationException( + "Potentially corrupted file: Checksum combination failed while combining stored checksum " + + "and calculated checksum of stored checksum", + resourceDescription, + e + ); + } + + try ( + RemoteTransferContainer remoteTransferContainer = new RemoteTransferContainer( + blobName, + blobName, + bytes.length, + true, + writePriority, + (size, position) -> new OffsetRangeIndexInputStream(input, size, position), + expectedChecksum, + ((AsyncMultiStreamBlobContainer) blobContainer).remoteIntegrityCheckSupported() + ) + ) { + ((AsyncMultiStreamBlobContainer) blobContainer).asyncBlobUpload(remoteTransferContainer.createWriteContext(), listener); + } + } + } + private void uploadBlob( TransferFileSnapshot fileSnapshot, ActionListener listener, diff --git a/server/src/main/java/org/opensearch/index/translog/transfer/TransferService.java b/server/src/main/java/org/opensearch/index/translog/transfer/TransferService.java index cfe833dde87eb..40527c04742c8 100644 --- a/server/src/main/java/org/opensearch/index/translog/transfer/TransferService.java +++ b/server/src/main/java/org/opensearch/index/translog/transfer/TransferService.java @@ -12,6 +12,7 @@ import org.opensearch.common.blobstore.BlobPath; import org.opensearch.common.blobstore.stream.write.WritePriority; import org.opensearch.core.action.ActionListener; +import org.opensearch.core.common.bytes.BytesReference; import org.opensearch.index.translog.transfer.FileSnapshot.TransferFileSnapshot; import java.io.IOException; @@ -63,6 +64,7 @@ void uploadBlobs( * @throws IOException the exception while transferring the data */ void uploadBlob(final TransferFileSnapshot fileSnapshot, Iterable remotePath, WritePriority writePriority) throws IOException; + void uploadBlob(InputStream inputStream, Iterable remotePath, String blobName, WritePriority writePriority, ActionListener listener) throws IOException; void deleteBlobs(Iterable path, List fileNames) throws IOException; diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index 7fc057825d7d2..afc82cc5aaa6d 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -135,6 +135,7 @@ import org.opensearch.gateway.GatewayService; import org.opensearch.gateway.MetaStateService; import org.opensearch.gateway.PersistedClusterStateService; +import org.opensearch.gateway.remote.RemoteClusterStateCleanupManager; import org.opensearch.gateway.remote.RemoteClusterStateService; import org.opensearch.http.HttpServerTransport; import org.opensearch.identity.IdentityService; @@ -145,6 +146,7 @@ import org.opensearch.index.analysis.AnalysisRegistry; import org.opensearch.index.engine.EngineFactory; import org.opensearch.index.recovery.RemoteStoreRestoreService; +import org.opensearch.index.remote.RemoteIndexPathUploader; import org.opensearch.index.remote.RemoteStoreStatsTrackerFactory; import org.opensearch.index.store.RemoteSegmentStoreDirectoryFactory; import org.opensearch.index.store.remote.filecache.FileCache; @@ -201,7 +203,7 @@ import org.opensearch.plugins.ScriptPlugin; import org.opensearch.plugins.SearchPipelinePlugin; import org.opensearch.plugins.SearchPlugin; -import org.opensearch.plugins.SecureTransportSettingsProvider; +import org.opensearch.plugins.SecureSettingsFactory; import org.opensearch.plugins.SystemIndexPlugin; import org.opensearch.plugins.TelemetryPlugin; import org.opensearch.ratelimitting.admissioncontrol.AdmissionControlService; @@ -258,7 +260,7 @@ import java.io.IOException; import java.net.InetAddress; import java.net.InetSocketAddress; -import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardCopyOption; @@ -724,17 +726,29 @@ protected Node( threadPool::relativeTimeInMillis ); final RemoteClusterStateService remoteClusterStateService; + final RemoteClusterStateCleanupManager remoteClusterStateCleanupManager; + final RemoteIndexPathUploader remoteIndexPathUploader; if (isRemoteStoreClusterStateEnabled(settings)) { + remoteIndexPathUploader = new RemoteIndexPathUploader( + threadPool, + settings, + repositoriesServiceReference::get, + clusterService.getClusterSettings() + ); remoteClusterStateService = new RemoteClusterStateService( nodeEnvironment.nodeId(), repositoriesServiceReference::get, settings, - clusterService.getClusterSettings(), + clusterService, threadPool::preciseRelativeTimeInNanos, - threadPool + threadPool, + List.of(remoteIndexPathUploader) ); + remoteClusterStateCleanupManager = remoteClusterStateService.getCleanupManager(); } else { remoteClusterStateService = null; + remoteIndexPathUploader = null; + remoteClusterStateCleanupManager = null; } // collect engine factory providers from plugins @@ -787,6 +801,8 @@ protected Node( final RecoverySettings recoverySettings = new RecoverySettings(settings, settingsModule.getClusterSettings()); + final RemoteStoreSettings remoteStoreSettings = new RemoteStoreSettings(settings, settingsModule.getClusterSettings()); + final IndexStorePlugin.DirectoryFactory remoteDirectoryFactory = new RemoteSegmentStoreDirectoryFactory( repositoriesServiceReference::get, threadPool @@ -824,7 +840,8 @@ protected Node( searchRequestStats, remoteStoreStatsTrackerFactory, recoverySettings, - cacheService + cacheService, + remoteStoreSettings ); final IngestService ingestService = new IngestService( @@ -858,7 +875,8 @@ protected Node( xContentRegistry, systemIndices, forbidPrivateIndexSettings, - awarenessReplicaBalance + awarenessReplicaBalance, + remoteStoreSettings ); pluginsService.filterPlugins(Plugin.class) .forEach( @@ -1125,7 +1143,8 @@ protected Node( rerouteService, fsHealthService, persistedStateRegistry, - remoteStoreNodeService + remoteStoreNodeService, + remoteClusterStateService ); final SearchPipelineService searchPipelineService = new SearchPipelineService( clusterService, @@ -1303,6 +1322,8 @@ protected Node( b.bind(SearchRequestSlowLog.class).toInstance(searchRequestSlowLog); b.bind(MetricsRegistry.class).toInstance(metricsRegistry); b.bind(RemoteClusterStateService.class).toProvider(() -> remoteClusterStateService); + b.bind(RemoteIndexPathUploader.class).toProvider(() -> remoteIndexPathUploader); + b.bind(RemoteClusterStateCleanupManager.class).toProvider(() -> remoteClusterStateCleanupManager); b.bind(PersistedStateRegistry.class).toInstance(persistedStateRegistry); b.bind(SegmentReplicationStatsTracker.class).toInstance(segmentReplicationStatsTracker); b.bind(SearchRequestOperationsCompositeListenerFactory.class).toInstance(searchRequestOperationsCompositeListenerFactory); @@ -1452,6 +1473,10 @@ public Node start() throws NodeValidationException { if (remoteClusterStateService != null) { remoteClusterStateService.start(); } + final RemoteIndexPathUploader remoteIndexPathUploader = injector.getInstance(RemoteIndexPathUploader.class); + if (remoteIndexPathUploader != null) { + remoteIndexPathUploader.start(); + } // Load (and maybe upgrade) the metadata stored on disk final GatewayMetaState gatewayMetaState = injector.getInstance(GatewayMetaState.class); gatewayMetaState.start( diff --git a/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeAttribute.java b/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeAttribute.java index 7575c6ff5fb34..c150386a8f610 100644 --- a/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeAttribute.java +++ b/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeAttribute.java @@ -12,7 +12,9 @@ import org.opensearch.cluster.metadata.RepositoriesMetadata; import org.opensearch.cluster.metadata.RepositoryMetadata; import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.routing.remote.RemoteRoutingTableService; import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.gateway.remote.RemoteClusterStateService; import org.opensearch.node.Node; import org.opensearch.repositories.blobstore.BlobStoreRepository; @@ -27,6 +29,8 @@ import java.util.Set; import java.util.stream.Collectors; +import static org.opensearch.common.util.FeatureFlags.REMOTE_ROUTING_TABLE_EXPERIMENTAL; + /** * This is an abstraction for validating and storing information specific to remote backed storage nodes. * @@ -45,6 +49,8 @@ public class RemoteStoreNodeAttribute { + "." + CryptoMetadata.SETTINGS_KEY; public static final String REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX = "remote_store.repository.%s.settings."; + public static final String REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY = "remote_store.routing.repository"; + private final RepositoriesMetadata repositoriesMetadata; /** @@ -103,11 +109,11 @@ private Map validateSettingsAttributesNonNull(DiscoveryNode node .filter(key -> key.startsWith(settingsAttributeKeyPrefix)) .collect(Collectors.toMap(key -> key.replace(settingsAttributeKeyPrefix, ""), key -> validateAttributeNonNull(node, key))); - if (settingsMap.isEmpty()) { - throw new IllegalStateException( - "joining node [" + node + "] doesn't have settings attribute for [" + repositoryName + "] repository" - ); - } +// if (settingsMap.isEmpty()) { +// throw new IllegalStateException( +// "joining node [" + node + "] doesn't have settings attribute for [" + repositoryName + "] repository" +// ); +// } return settingsMap; } @@ -151,6 +157,10 @@ private Set getValidatedRepositoryNames(DiscoveryNode node) { } else if (node.getAttributes().containsKey(REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY)) { repositoryNames.add(validateAttributeNonNull(node, REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY)); } + if (node.getAttributes().containsKey(REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY)){ + repositoryNames.add(validateAttributeNonNull(node, REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY)); + } + return repositoryNames; } @@ -173,6 +183,16 @@ public static boolean isRemoteStoreClusterStateEnabled(Settings settings) { && isRemoteClusterStateAttributePresent(settings); } + public static boolean isRemoteRoutingTableAttributePresent(Settings settings) { + return settings.getByPrefix(Node.NODE_ATTRIBUTES.getKey() + REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY) + .isEmpty() == false; + } + + public static boolean isRemoteRoutingTableEnabled(Settings settings) { + return FeatureFlags.isEnabled(REMOTE_ROUTING_TABLE_EXPERIMENTAL) && RemoteRoutingTableService.REMOTE_ROUTING_TABLE_ENABLED_SETTING.get(settings) + && isRemoteRoutingTableAttributePresent(settings); + } + public RepositoriesMetadata getRepositoriesMetadata() { return this.repositoriesMetadata; } diff --git a/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java b/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java index 3e6052a5ef820..78b8fbeb45852 100644 --- a/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java +++ b/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java @@ -31,6 +31,7 @@ package org.opensearch.repositories.blobstore; +import java.util.concurrent.CompletableFuture; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexFormatTooNewException; @@ -38,40 +39,44 @@ import org.apache.lucene.store.ByteBuffersDataInput; import org.apache.lucene.store.ByteBuffersIndexInput; import org.apache.lucene.store.IndexInput; -import org.apache.lucene.store.OutputStreamIndexOutput; import org.apache.lucene.util.BytesRef; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.common.CheckedFunction; import org.opensearch.common.blobstore.AsyncMultiStreamBlobContainer; import org.opensearch.common.blobstore.BlobContainer; +import org.opensearch.common.blobstore.stream.read.ReadContext; import org.opensearch.common.blobstore.stream.write.WritePriority; import org.opensearch.common.blobstore.transfer.RemoteTransferContainer; import org.opensearch.common.blobstore.transfer.stream.OffsetRangeIndexInputStream; +import org.opensearch.common.io.InputStreamContainer; import org.opensearch.common.io.Streams; -import org.opensearch.common.io.stream.BytesStreamOutput; import org.opensearch.common.lucene.store.ByteArrayIndexInput; -import org.opensearch.common.lucene.store.IndexOutputOutputStream; import org.opensearch.common.xcontent.LoggingDeprecationHandler; import org.opensearch.common.xcontent.XContentHelper; import org.opensearch.common.xcontent.XContentType; import org.opensearch.core.action.ActionListener; import org.opensearch.core.common.bytes.BytesReference; import org.opensearch.core.compress.Compressor; -import org.opensearch.core.xcontent.MediaTypeRegistry; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.core.xcontent.ToXContent; -import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.core.xcontent.XContentParser; import org.opensearch.gateway.CorruptStateException; import org.opensearch.index.store.exception.ChecksumCombinationException; import org.opensearch.snapshots.SnapshotInfo; +import org.opensearch.threadpool.ThreadPool; import java.io.IOException; -import java.io.OutputStream; +import java.io.InputStream; +import java.io.SequenceInputStream; +import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; +import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.stream.Collectors; import static org.opensearch.common.blobstore.transfer.RemoteTransferContainer.checksumOfChecksum; @@ -80,7 +85,7 @@ * * @opensearch.internal */ -public final class ChecksumBlobStoreFormat { +public final class ChecksumBlobStoreFormat extends BaseBlobStoreFormat { // Serialization parameters to specify correct context for metadata serialization public static final ToXContent.Params SNAPSHOT_ONLY_FORMAT_PARAMS; @@ -98,12 +103,8 @@ public final class ChecksumBlobStoreFormat { // The format version public static final int VERSION = 1; - private static final int BUFFER_SIZE = 4096; - private final String codec; - private final String blobNameFormat; - private final CheckedFunction reader; /** @@ -112,8 +113,8 @@ public final class ChecksumBlobStoreFormat { * @param reader prototype object that can deserialize T from XContent */ public ChecksumBlobStoreFormat(String codec, String blobNameFormat, CheckedFunction reader) { + super(blobNameFormat, false); this.reader = reader; - this.blobNameFormat = blobNameFormat; this.codec = codec; } @@ -124,13 +125,22 @@ public ChecksumBlobStoreFormat(String codec, String blobNameFormat, CheckedFunct * @param name name to be translated into * @return parsed blob object */ - public T read(BlobContainer blobContainer, String name, NamedXContentRegistry namedXContentRegistry) throws IOException { - String blobName = blobName(name); + public T read(BlobContainer blobContainer, String name, NamedXContentRegistry namedXContentRegistry) throws IOException {String blobName = blobName(name); return deserialize(blobName, namedXContentRegistry, Streams.readFully(blobContainer.readBlob(blobName))); } + public void readAsync(BlobContainer blobContainer, String name, NamedXContentRegistry namedXContentRegistry, ExecutorService executorService, ActionListener listener) throws IOException { + executorService.execute(() -> { + try { + listener.onResponse(read(blobContainer, name, namedXContentRegistry)); + } catch (Exception e) { + listener.onFailure(e); + } + }); + } + public String blobName(String name) { - return String.format(Locale.ROOT, blobNameFormat, name); + return String.format(Locale.ROOT, getBlobNameFormat(), name); } public T deserialize(String blobName, NamedXContentRegistry namedXContentRegistry, BytesReference bytes) throws IOException { @@ -170,30 +180,7 @@ public T deserialize(String blobName, NamedXContentRegistry namedXContentRegistr * @param compressor whether to use compression */ public void write(final T obj, final BlobContainer blobContainer, final String name, final Compressor compressor) throws IOException { - write(obj, blobContainer, name, compressor, SNAPSHOT_ONLY_FORMAT_PARAMS); - } - - /** - * Writes blob with resolving the blob name using {@link #blobName} method. - *

- * The blob will optionally by compressed. - * - * @param obj object to be serialized - * @param blobContainer blob container - * @param name blob name - * @param compressor whether to use compression - * @param params ToXContent params - */ - public void write( - final T obj, - final BlobContainer blobContainer, - final String name, - final Compressor compressor, - final ToXContent.Params params - ) throws IOException { - final String blobName = blobName(name); - final BytesReference bytes = serialize(obj, blobName, compressor, params); - blobContainer.writeBlob(blobName, bytes.streamInput(), bytes.length(), false); + write(obj, blobContainer, name, compressor, SNAPSHOT_ONLY_FORMAT_PARAMS, XContentType.SMILE, codec, VERSION); } /** @@ -251,7 +238,7 @@ private void writeAsyncWithPriority( final ToXContent.Params params ) throws IOException { if (blobContainer instanceof AsyncMultiStreamBlobContainer == false) { - write(obj, blobContainer, name, compressor, params); + write(obj, blobContainer, name, compressor, params, XContentType.SMILE, codec, VERSION); listener.onResponse(null); return; } @@ -290,35 +277,6 @@ private void writeAsyncWithPriority( public BytesReference serialize(final T obj, final String blobName, final Compressor compressor, final ToXContent.Params params) throws IOException { - try (BytesStreamOutput outputStream = new BytesStreamOutput()) { - try ( - OutputStreamIndexOutput indexOutput = new OutputStreamIndexOutput( - "ChecksumBlobStoreFormat.writeBlob(blob=\"" + blobName + "\")", - blobName, - outputStream, - BUFFER_SIZE - ) - ) { - CodecUtil.writeHeader(indexOutput, codec, VERSION); - try (OutputStream indexOutputOutputStream = new IndexOutputOutputStream(indexOutput) { - @Override - public void close() throws IOException { - // this is important since some of the XContentBuilders write bytes on close. - // in order to write the footer we need to prevent closing the actual index input. - } - }; - XContentBuilder builder = MediaTypeRegistry.contentBuilder( - XContentType.SMILE, - compressor.threadLocalOutputStream(indexOutputOutputStream) - ) - ) { - builder.startObject(); - obj.toXContent(builder, params); - builder.endObject(); - } - CodecUtil.writeFooter(indexOutput); - } - return outputStream.bytes(); - } + return serialize(obj, blobName, compressor, params, XContentType.SMILE, codec, VERSION); } } diff --git a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java index e9bbf3d861408..3ac880954ffcd 100644 --- a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java @@ -2549,7 +2549,8 @@ public void start(ClusterState initialState) { ElectionStrategy.DEFAULT_INSTANCE, () -> new StatusInfo(HEALTHY, "healthy-info"), persistedStateRegistry, - remoteStoreNodeService + remoteStoreNodeService, + null ); clusterManagerService.setClusterStatePublisher(coordinator); coordinator.start(); diff --git a/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java b/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java index 0754cc1793dc8..0cf9858be7632 100644 --- a/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java +++ b/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java @@ -1180,7 +1180,8 @@ protected Optional getDisruptableMockTransport(Transpo getElectionStrategy(), nodeHealthService, persistedStateRegistry, - remoteStoreNodeService + remoteStoreNodeService, + null ); clusterManagerService.setClusterStatePublisher(coordinator); final GatewayService gatewayService = new GatewayService(