Skip to content

Commit

Permalink
Support iceberg register_table procedure to register hadoop tables
Browse files Browse the repository at this point in the history
Co-authored-by: Marius Grama <[email protected]>
  • Loading branch information
2 people authored and findepi committed Jul 12, 2023
1 parent 435470e commit 8bb53d6
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,11 @@ public final class IcebergUtil
// - 00001-409702ba-4735-4645-8f14-09537cc0b2c8.gz.metadata.json (https://github.com/apache/iceberg/blob/ab398a0d5ff195f763f8c7a4358ac98fa38a8de7/core/src/main/java/org/apache/iceberg/TableMetadataParser.java#L141)
// - 00001-409702ba-4735-4645-8f14-09537cc0b2c8.metadata.json.gz (https://github.com/apache/iceberg/blob/ab398a0d5ff195f763f8c7a4358ac98fa38a8de7/core/src/main/java/org/apache/iceberg/TableMetadataParser.java#L146)
private static final Pattern METADATA_FILE_NAME_PATTERN = Pattern.compile("(?<version>\\d+)-(?<uuid>[-a-fA-F0-9]*)(?<compression>\\.[a-zA-Z0-9]+)?" + Pattern.quote(METADATA_FILE_EXTENSION) + "(?<compression2>\\.[a-zA-Z0-9]+)?");
// Hadoop Generated Metadata file name examples
// - v0.metadata.json
// - v0.gz.metadata.json
// - v0.metadata.json.gz
private static final Pattern HADOOP_GENERATED_METADATA_FILE_NAME_PATTERN = Pattern.compile("v(?<version>\\d+)(?<compression>\\.[a-zA-Z0-9]+)?" + Pattern.quote(METADATA_FILE_EXTENSION) + "(?<compression2>\\.[a-zA-Z0-9]+)?");

private IcebergUtil() {}

Expand Down Expand Up @@ -716,6 +721,10 @@ public static int parseVersion(String metadataFileName)
if (matcher.matches()) {
return parseInt(matcher.group("version"));
}
matcher = HADOOP_GENERATED_METADATA_FILE_NAME_PATTERN.matcher(metadataFileName);
if (matcher.matches()) {
return parseInt(matcher.group("version"));
}
throw new TrinoException(ICEBERG_BAD_DATA, "Invalid metadata file name: " + metadataFileName);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
*/
package io.trino.plugin.iceberg;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import io.trino.filesystem.FileEntry;
import io.trino.filesystem.FileIterator;
Expand All @@ -22,6 +23,15 @@
import io.trino.testing.AbstractTestQueryFramework;
import io.trino.testing.MaterializedResult;
import io.trino.testing.QueryRunner;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.DataFiles;
import org.apache.iceberg.FileFormat;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.Schema;
import org.apache.iceberg.SortOrder;
import org.apache.iceberg.Table;
import org.apache.iceberg.hadoop.HadoopTables;
import org.apache.iceberg.types.Types;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
Expand All @@ -38,6 +48,7 @@
import static com.google.common.base.Verify.verify;
import static com.google.common.io.MoreFiles.deleteRecursively;
import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE;
import static io.trino.hadoop.ConfigurationInstantiator.newEmptyConfiguration;
import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore;
import static io.trino.plugin.iceberg.IcebergTestUtils.getFileSystemFactory;
import static io.trino.plugin.iceberg.IcebergUtil.METADATA_FOLDER_NAME;
Expand All @@ -46,6 +57,7 @@
import static io.trino.testing.TestingNames.randomNameSuffix;
import static java.lang.String.format;
import static java.util.Locale.ENGLISH;
import static org.apache.iceberg.Files.localInput;
import static org.assertj.core.api.Assertions.assertThat;

public class TestIcebergRegisterTableProcedure
Expand Down Expand Up @@ -425,6 +437,62 @@ public void testRegisterTableWithInvalidMetadataFileName()
}
}

@Test
public void testRegisterHadoopTableAndRead()
{
// create a temporary table to generate data file
String tempTableName = "temp_table_" + randomNameSuffix();
assertUpdate("CREATE TABLE " + tempTableName + " (id INT, name VARCHAR) WITH (format = 'ORC')");
assertUpdate("INSERT INTO " + tempTableName + " values(1, 'INDIA')", 1);
String dataFilePath = (String) computeScalar("SELECT \"$path\" FROM " + tempTableName);

// create hadoop table
String hadoopTableName = "hadoop_table_" + randomNameSuffix();
String hadoopTableLocation = metastoreDir.getPath() + "/" + hadoopTableName;
HadoopTables hadoopTables = new HadoopTables(newEmptyConfiguration());
Schema schema = new Schema(ImmutableList.of(
Types.NestedField.optional(1, "id", Types.IntegerType.get()),
Types.NestedField.optional(2, "name", Types.StringType.get())));
Table table = hadoopTables.create(
schema,
PartitionSpec.unpartitioned(),
SortOrder.unsorted(),
ImmutableMap.of("write.format.default", "ORC"),
hadoopTableLocation);

// append data file to hadoop table
DataFile dataFile =
DataFiles.builder(PartitionSpec.unpartitioned())
.withFormat(FileFormat.ORC)
.withInputFile(localInput(new File(dataFilePath)))
.withPath(dataFilePath)
.withRecordCount(1)
.build();
table.newFastAppend()
.appendFile(dataFile)
.commit();

// Hadoop style version number
assertThat(Location.of(getLatestMetadataLocation(fileSystem, hadoopTableLocation)).fileName())
.isEqualTo("v2.metadata.json");

// Try registering hadoop table in Trino and read it
String registeredTableName = "registered_table_" + randomNameSuffix();
assertUpdate("CALL system.register_table(CURRENT_SCHEMA, '%s', '%s')".formatted(registeredTableName, hadoopTableLocation));
assertQuery("SELECT * FROM " + registeredTableName, "VALUES (1, 'INDIA')");

// Verify the table can be written to despite using non-standard metadata file name
assertUpdate("INSERT INTO " + registeredTableName + " VALUES (2, 'POLAND')", 1);
assertQuery("SELECT * FROM " + registeredTableName, "VALUES (1, 'INDIA'), (2, 'POLAND')");

// New metadata file is written using standard file name convention
assertThat(Location.of(getLatestMetadataLocation(fileSystem, hadoopTableLocation)).fileName())
.matches("00003-.*\\.metadata\\.json");

assertUpdate("DROP TABLE " + registeredTableName);
assertUpdate("DROP TABLE " + tempTableName);
}

private String getTableLocation(String tableName)
{
Pattern locationPattern = Pattern.compile(".*location = '(.*?)'.*", Pattern.DOTALL);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ public void testParseVersion()
assertEquals(parseVersion("99999-409702ba-4735-4645-8f14-09537cc0b2c8.metadata.json"), 99999);
assertEquals(parseVersion("00010-409702ba-4735-4645-8f14-09537cc0b2c8.metadata.json"), 10);
assertEquals(parseVersion("00011-409702ba-4735-4645-8f14-09537cc0b2c8.metadata.json"), 11);
assertEquals(parseVersion("v0.metadata.json"), 0);
assertEquals(parseVersion("v10.metadata.json"), 10);
assertEquals(parseVersion("v99999.metadata.json"), 99999);
assertEquals(parseVersion("v0.gz.metadata.json"), 0);
assertEquals(parseVersion("v0.metadata.json.gz"), 0);

assertThatThrownBy(() -> parseVersion("hdfs://hadoop-master:9000/user/hive/warehouse/orders_5-581fad8517934af6be1857a903559d44/metadata/00000-409702ba-4735-4645-8f14-09537cc0b2c8.metadata.json"))
.hasMessageMatching("Not a file name: .*");
Expand All @@ -37,10 +42,18 @@ public void testParseVersion()
.hasMessageMatching("Invalid metadata file name:.*");
assertThatThrownBy(() -> parseVersion("00010_409702ba_4735_4645_8f14_09537cc0b2c8.metadata.json"))
.hasMessageMatching("Invalid metadata file name:.*");
assertThatThrownBy(() -> parseVersion("v10_metadata_json"))
.hasMessageMatching("Invalid metadata file name:.*");
assertThatThrownBy(() -> parseVersion("v1..gz.metadata.json"))
.hasMessageMatching("Invalid metadata file name:.*");
assertThatThrownBy(() -> parseVersion("v1.metadata.json.gz."))
.hasMessageMatching("Invalid metadata file name:.*");

assertThatThrownBy(() -> parseVersion("00003_409702ba-4735-4645-8f14-09537cc0b2c8.metadata.json"))
.hasMessageMatching("Invalid metadata file name:.*");
assertThatThrownBy(() -> parseVersion("-00010-409702ba-4735-4645-8f14-09537cc0b2c8.metadata.json"))
.hasMessageMatching("Invalid metadata file name:.*");
assertThatThrownBy(() -> parseVersion("v-10.metadata.json"))
.hasMessageMatching("Invalid metadata file name:.*");
}
}

0 comments on commit 8bb53d6

Please sign in to comment.