Skip to content

Commit

Permalink
fix TestFragmentScanOptions
Browse files Browse the repository at this point in the history
  • Loading branch information
jinchengchenghh committed Aug 12, 2024
1 parent ee86db5 commit ea541c6
Showing 1 changed file with 44 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,16 @@

public class TestFragmentScanOptions {

private CsvFragmentScanOptions create(
ArrowSchema cSchema,
Map<String, String> convertOptionsMap,
Map<String, String> readOptions,
Map<String, String> parseOptions) {
CsvConvertOptions convertOptions = new CsvConvertOptions(convertOptionsMap);
convertOptions.setArrowSchema(cSchema);
return new CsvFragmentScanOptions(convertOptions, readOptions, parseOptions);
}

@Test
public void testCsvConvertOptions() throws Exception {
final Schema schema =
Expand All @@ -63,24 +73,29 @@ public void testCsvConvertOptions() throws Exception {
String path = "file://" + getClass().getResource("/").getPath() + "/data/student.csv";
BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
try (ArrowSchema cSchema = ArrowSchema.allocateNew(allocator);
ArrowSchema cSchema2 = ArrowSchema.allocateNew(allocator);
CDataDictionaryProvider provider = new CDataDictionaryProvider()) {
Data.exportSchema(allocator, schema, provider, cSchema);
CsvConvertOptions convertOptions = new CsvConvertOptions(ImmutableMap.of("delimiter", ";"));
convertOptions.setArrowSchema(cSchema);
CsvFragmentScanOptions fragmentScanOptions =
new CsvFragmentScanOptions(convertOptions, ImmutableMap.of(), ImmutableMap.of());
Data.exportSchema(allocator, schema, provider, cSchema2);
CsvFragmentScanOptions fragmentScanOptions1 =
create(cSchema, ImmutableMap.of(), ImmutableMap.of(), ImmutableMap.of("delimiter", ";"));
CsvFragmentScanOptions fragmentScanOptions2 =
create(cSchema2, ImmutableMap.of(), ImmutableMap.of(), ImmutableMap.of("delimiter", ";"));
ScanOptions options =
new ScanOptions.Builder(/*batchSize*/ 32768)
.columns(Optional.empty())
.fragmentScanOptions(fragmentScanOptions)
.fragmentScanOptions(fragmentScanOptions1)
.build();
try (DatasetFactory datasetFactory =
new FileSystemDatasetFactory(
allocator, NativeMemoryPool.getDefault(), FileFormat.CSV, path);
allocator,
NativeMemoryPool.getDefault(),
FileFormat.CSV,
path,
Optional.of(fragmentScanOptions2));
Dataset dataset = datasetFactory.finish();
Scanner scanner = dataset.newScan(options);
ArrowReader reader = scanner.scanBatches()) {

assertEquals(schema.getFields(), reader.getVectorSchemaRoot().getSchema().getFields());
int rowCount = 0;
while (reader.loadNextBatch()) {
Expand All @@ -106,30 +121,38 @@ public void testCsvConvertOptionsDelimiterNotSet() throws Exception {
String path = "file://" + getClass().getResource("/").getPath() + "/data/student.csv";
BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
try (ArrowSchema cSchema = ArrowSchema.allocateNew(allocator);
ArrowSchema cSchema2 = ArrowSchema.allocateNew(allocator);
CDataDictionaryProvider provider = new CDataDictionaryProvider()) {
Data.exportSchema(allocator, schema, provider, cSchema);
CsvConvertOptions convertOptions = new CsvConvertOptions(ImmutableMap.of());
convertOptions.setArrowSchema(cSchema);
CsvFragmentScanOptions fragmentScanOptions =
new CsvFragmentScanOptions(convertOptions, ImmutableMap.of(), ImmutableMap.of());
Data.exportSchema(allocator, schema, provider, cSchema2);
CsvFragmentScanOptions fragmentScanOptions1 =
create(cSchema, ImmutableMap.of(), ImmutableMap.of(), ImmutableMap.of());
CsvFragmentScanOptions fragmentScanOptions2 =
create(cSchema2, ImmutableMap.of(), ImmutableMap.of(), ImmutableMap.of());
ScanOptions options =
new ScanOptions.Builder(/*batchSize*/ 32768)
.columns(Optional.empty())
.fragmentScanOptions(fragmentScanOptions)
.fragmentScanOptions(fragmentScanOptions1)
.build();
try (DatasetFactory datasetFactory =
new FileSystemDatasetFactory(
allocator, NativeMemoryPool.getDefault(), FileFormat.CSV, path);
allocator,
NativeMemoryPool.getDefault(),
FileFormat.CSV,
path,
Optional.of(fragmentScanOptions2));
Dataset dataset = datasetFactory.finish();
Scanner scanner = dataset.newScan(options);
ArrowReader reader = scanner.scanBatches()) {

assertEquals(schema.getFields(), reader.getVectorSchemaRoot().getSchema().getFields());
int rowCount = 0;
while (reader.loadNextBatch()) {
final ValueIterableVector<Integer> idVector =
(ValueIterableVector<Integer>) reader.getVectorSchemaRoot().getVector("Id");
assertThat(idVector.getValueIterable(), IsIterableContainingInOrder.contains(1, 2, 3));
final ValueIterableVector<Text> idVector =
(ValueIterableVector<Text>)
reader.getVectorSchemaRoot().getVector("Id;Name;Language");
assertThat(
idVector.getValueIterable(),
IsIterableContainingInOrder.contains(
new Text("1;Juno;Java"), new Text("2;Peter;Python"), new Text("3;Celin;C++")));
rowCount += reader.getVectorSchemaRoot().getRowCount();
}
assertEquals(3, rowCount);
Expand Down Expand Up @@ -157,13 +180,12 @@ public void testCsvConvertOptionsNoOption() throws Exception {
assertEquals(schema.getFields(), reader.getVectorSchemaRoot().getSchema().getFields());
int rowCount = 0;
while (reader.loadNextBatch()) {
final ValueIterableVector<String> idVector =
(ValueIterableVector<String>)
reader.getVectorSchemaRoot().getVector("Id;Name;Language");
final ValueIterableVector<Text> idVector =
(ValueIterableVector<Text>) reader.getVectorSchemaRoot().getVector("Id;Name;Language");
assertThat(
idVector.getValueIterable(),
IsIterableContainingInOrder.contains(
"1;Juno;Java\n" + "2;Peter;Python\n" + "3;Celin;C++"));
new Text("1;Juno;Java"), new Text("2;Peter;Python"), new Text("3;Celin;C++")));
rowCount += reader.getVectorSchemaRoot().getRowCount();
}
assertEquals(3, rowCount);
Expand Down

0 comments on commit ea541c6

Please sign in to comment.