Skip to content

Commit

Permalink
refactor(rust): Rename MetaData -> Metadata (#18644)
Browse files Browse the repository at this point in the history
  • Loading branch information
nameexhaustion committed Sep 10, 2024
1 parent 45c8e96 commit 8ebd739
Show file tree
Hide file tree
Showing 24 changed files with 108 additions and 108 deletions.
4 changes: 2 additions & 2 deletions crates/polars-io/src/parquet/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

use std::sync::Arc;

pub use polars_parquet::parquet::metadata::FileMetaData;
pub use polars_parquet::parquet::metadata::FileMetadata;
pub use polars_parquet::read::statistics::{deserialize, Statistics as ParquetStatistics};

pub type FileMetaDataRef = Arc<FileMetaData>;
pub type FileMetadataRef = Arc<FileMetadata>;
22 changes: 11 additions & 11 deletions crates/polars-io/src/parquet/read/async_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ use bytes::Bytes;
use object_store::path::Path as ObjectPath;
use polars_core::config::{get_rg_prefetch_size, verbose};
use polars_core::prelude::*;
use polars_parquet::read::RowGroupMetaData;
use polars_parquet::write::FileMetaData;
use polars_parquet::read::RowGroupMetadata;
use polars_parquet::write::FileMetadata;
use polars_utils::pl_str::PlSmallStr;
use tokio::sync::mpsc::{channel, Receiver, Sender};
use tokio::sync::Mutex;
Expand All @@ -17,7 +17,7 @@ use super::predicates::read_this_row_group;
use crate::cloud::{
build_object_store, object_path_from_str, CloudLocation, CloudOptions, PolarsObjectStore,
};
use crate::parquet::metadata::FileMetaDataRef;
use crate::parquet::metadata::FileMetadataRef;
use crate::pl_async::get_runtime;
use crate::predicates::PhysicalIoExpr;

Expand All @@ -29,14 +29,14 @@ pub struct ParquetObjectStore {
store: PolarsObjectStore,
path: ObjectPath,
length: Option<usize>,
metadata: Option<FileMetaDataRef>,
metadata: Option<FileMetadataRef>,
}

impl ParquetObjectStore {
pub async fn from_uri(
uri: &str,
options: Option<&CloudOptions>,
metadata: Option<FileMetaDataRef>,
metadata: Option<FileMetadataRef>,
) -> PolarsResult<Self> {
let (CloudLocation { prefix, .. }, store) = build_object_store(uri, options, false).await?;
let path = object_path_from_str(&prefix)?;
Expand Down Expand Up @@ -74,13 +74,13 @@ impl ParquetObjectStore {
}

/// Fetch the metadata of the parquet file, do not memoize it.
async fn fetch_metadata(&mut self) -> PolarsResult<FileMetaData> {
async fn fetch_metadata(&mut self) -> PolarsResult<FileMetadata> {
let length = self.length().await?;
fetch_metadata(&self.store, &self.path, length).await
}

/// Fetch and memoize the metadata of the parquet file.
pub async fn get_metadata(&mut self) -> PolarsResult<&FileMetaDataRef> {
pub async fn get_metadata(&mut self) -> PolarsResult<&FileMetadataRef> {
if self.metadata.is_none() {
self.metadata = Some(Arc::new(self.fetch_metadata().await?));
}
Expand All @@ -107,7 +107,7 @@ pub async fn fetch_metadata(
store: &PolarsObjectStore,
path: &ObjectPath,
file_byte_length: usize,
) -> PolarsResult<FileMetaData> {
) -> PolarsResult<FileMetadata> {
let footer_header_bytes = store
.get_range(
path,
Expand Down Expand Up @@ -165,7 +165,7 @@ pub async fn fetch_metadata(
/// We concurrently download the columns for each field.
async fn download_projection(
fields: Arc<[PlSmallStr]>,
row_group: RowGroupMetaData,
row_group: RowGroupMetadata,
async_reader: Arc<ParquetObjectStore>,
sender: QueueSend,
rg_index: usize,
Expand Down Expand Up @@ -205,7 +205,7 @@ async fn download_projection(
}

async fn download_row_group(
rg: RowGroupMetaData,
rg: RowGroupMetadata,
async_reader: Arc<ParquetObjectStore>,
sender: QueueSend,
rg_index: usize,
Expand Down Expand Up @@ -255,7 +255,7 @@ impl FetchRowGroupsFromObjectStore {
projection: Option<&[usize]>,
predicate: Option<Arc<dyn PhysicalIoExpr>>,
row_group_range: Range<usize>,
row_groups: &[RowGroupMetaData],
row_groups: &[RowGroupMetadata],
) -> PolarsResult<Self> {
let projected_fields: Option<Arc<[PlSmallStr]>> = projection.map(|projection| {
projection
Expand Down
6 changes: 3 additions & 3 deletions crates/polars-io/src/parquet/read/predicates.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use polars_core::prelude::*;
use polars_parquet::read::statistics::{deserialize, Statistics};
use polars_parquet::read::RowGroupMetaData;
use polars_parquet::read::RowGroupMetadata;

use crate::predicates::{BatchStats, ColumnStats, PhysicalIoExpr};

Expand All @@ -17,7 +17,7 @@ impl ColumnStats {

/// Collect the statistics in a row-group
pub(crate) fn collect_statistics(
md: &RowGroupMetaData,
md: &RowGroupMetadata,
schema: &ArrowSchema,
) -> PolarsResult<Option<BatchStats>> {
// TODO! fix this performance. This is a full sequential scan.
Expand Down Expand Up @@ -47,7 +47,7 @@ pub(crate) fn collect_statistics(

pub fn read_this_row_group(
predicate: Option<&dyn PhysicalIoExpr>,
md: &RowGroupMetaData,
md: &RowGroupMetadata,
schema: &ArrowSchema,
) -> PolarsResult<bool> {
if let Some(pred) = predicate {
Expand Down
20 changes: 10 additions & 10 deletions crates/polars-io/src/parquet/read/read_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use polars_core::POOL;
use polars_parquet::parquet::error::ParquetResult;
use polars_parquet::parquet::statistics::Statistics;
use polars_parquet::read::{
self, ColumnChunkMetadata, FileMetaData, Filter, PhysicalType, RowGroupMetaData,
self, ColumnChunkMetadata, FileMetadata, Filter, PhysicalType, RowGroupMetadata,
};
use polars_utils::mmap::MemSlice;
use rayon::prelude::*;
Expand All @@ -26,7 +26,7 @@ use super::utils::materialize_empty_df;
use super::{mmap, ParallelStrategy};
use crate::hive::materialize_hive_partitions;
use crate::mmap::{MmapBytesReader, ReaderBytes};
use crate::parquet::metadata::FileMetaDataRef;
use crate::parquet::metadata::FileMetadataRef;
use crate::parquet::read::ROW_COUNT_OVERFLOW_ERR;
use crate::predicates::{apply_predicate, PhysicalIoExpr};
use crate::utils::get_reader_bytes;
Expand Down Expand Up @@ -142,7 +142,7 @@ fn rg_to_dfs(
row_group_start: usize,
row_group_end: usize,
slice: (usize, usize),
file_metadata: &FileMetaData,
file_metadata: &FileMetadata,
schema: &ArrowSchemaRef,
predicate: Option<&dyn PhysicalIoExpr>,
row_index: Option<RowIndex>,
Expand Down Expand Up @@ -227,7 +227,7 @@ fn rg_to_dfs_prefiltered(
previous_row_count: &mut IdxSize,
row_group_start: usize,
row_group_end: usize,
file_metadata: &FileMetaData,
file_metadata: &FileMetadata,
schema: &ArrowSchemaRef,
live_variables: Vec<PlSmallStr>,
predicate: &dyn PhysicalIoExpr,
Expand Down Expand Up @@ -501,7 +501,7 @@ fn rg_to_dfs_optionally_par_over_columns(
row_group_start: usize,
row_group_end: usize,
slice: (usize, usize),
file_metadata: &FileMetaData,
file_metadata: &FileMetadata,
schema: &ArrowSchemaRef,
predicate: Option<&dyn PhysicalIoExpr>,
row_index: Option<RowIndex>,
Expand Down Expand Up @@ -605,7 +605,7 @@ fn rg_to_dfs_par_over_rg(
row_group_end: usize,
previous_row_count: &mut IdxSize,
slice: (usize, usize),
file_metadata: &FileMetaData,
file_metadata: &FileMetadata,
schema: &ArrowSchemaRef,
predicate: Option<&dyn PhysicalIoExpr>,
row_index: Option<RowIndex>,
Expand Down Expand Up @@ -701,7 +701,7 @@ pub fn read_parquet<R: MmapBytesReader>(
slice: (usize, usize),
projection: Option<&[usize]>,
reader_schema: &ArrowSchemaRef,
metadata: Option<FileMetaDataRef>,
metadata: Option<FileMetadataRef>,
predicate: Option<&dyn PhysicalIoExpr>,
mut parallel: ParallelStrategy,
row_index: Option<RowIndex>,
Expand Down Expand Up @@ -855,7 +855,7 @@ pub(super) fn compute_row_group_range(
row_group_start: usize,
row_group_end: usize,
slice: (usize, usize),
row_groups: &[RowGroupMetaData],
row_groups: &[RowGroupMetadata],
) -> std::ops::Range<usize> {
let mut start = row_group_start;
let mut cum_rows: usize = (0..row_group_start).map(|i| row_groups[i].num_rows()).sum();
Expand Down Expand Up @@ -901,7 +901,7 @@ pub struct BatchedParquetReader {
slice: (usize, usize),
projection: Arc<[usize]>,
schema: ArrowSchemaRef,
metadata: FileMetaDataRef,
metadata: FileMetadataRef,
predicate: Option<Arc<dyn PhysicalIoExpr>>,
row_index: Option<RowIndex>,
rows_read: IdxSize,
Expand All @@ -921,7 +921,7 @@ impl BatchedParquetReader {
#[allow(clippy::too_many_arguments)]
pub fn new(
row_group_fetcher: RowGroupFetcher,
metadata: FileMetaDataRef,
metadata: FileMetadataRef,
schema: ArrowSchemaRef,
slice: (usize, usize),
projection: Option<Vec<usize>>,
Expand Down
10 changes: 5 additions & 5 deletions crates/polars-io/src/parquet/read/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ use super::utils::materialize_empty_df;
#[cfg(feature = "cloud")]
use crate::cloud::CloudOptions;
use crate::mmap::MmapBytesReader;
use crate::parquet::metadata::FileMetaDataRef;
use crate::parquet::metadata::FileMetadataRef;
use crate::predicates::PhysicalIoExpr;
use crate::prelude::*;
use crate::RowIndex;
Expand All @@ -35,7 +35,7 @@ pub struct ParquetReader<R: Read + Seek> {
schema: Option<ArrowSchemaRef>,
row_index: Option<RowIndex>,
low_memory: bool,
metadata: Option<FileMetaDataRef>,
metadata: Option<FileMetadataRef>,
predicate: Option<Arc<dyn PhysicalIoExpr>>,
hive_partition_columns: Option<Vec<Series>>,
include_file_path: Option<(PlSmallStr, Arc<str>)>,
Expand Down Expand Up @@ -138,7 +138,7 @@ impl<R: MmapBytesReader> ParquetReader<R> {
self
}

pub fn get_metadata(&mut self) -> PolarsResult<&FileMetaDataRef> {
pub fn get_metadata(&mut self) -> PolarsResult<&FileMetadataRef> {
if self.metadata.is_none() {
self.metadata = Some(Arc::new(read::read_metadata(&mut self.reader)?));
}
Expand Down Expand Up @@ -267,7 +267,7 @@ impl ParquetAsyncReader {
pub async fn from_uri(
uri: &str,
cloud_options: Option<&CloudOptions>,
metadata: Option<FileMetaDataRef>,
metadata: Option<FileMetadataRef>,
) -> PolarsResult<ParquetAsyncReader> {
Ok(ParquetAsyncReader {
reader: ParquetObjectStore::from_uri(uri, cloud_options, metadata).await?,
Expand Down Expand Up @@ -406,7 +406,7 @@ impl ParquetAsyncReader {
)
}

pub async fn get_metadata(&mut self) -> PolarsResult<&FileMetaDataRef> {
pub async fn get_metadata(&mut self) -> PolarsResult<&FileMetadataRef> {
self.reader.get_metadata().await
}

Expand Down
6 changes: 3 additions & 3 deletions crates/polars-mem-engine/src/executors/scan/parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use polars_core::config::{get_file_prefetch_size, verbose};
use polars_core::utils::accumulate_dataframes_vertical;
use polars_error::feature_gated;
use polars_io::cloud::CloudOptions;
use polars_io::parquet::metadata::FileMetaDataRef;
use polars_io::parquet::metadata::FileMetadataRef;
use polars_io::utils::slice::split_slice_at_file;
use polars_io::RowIndex;

Expand All @@ -21,7 +21,7 @@ pub struct ParquetExec {
cloud_options: Option<CloudOptions>,
file_options: FileScanOptions,
#[allow(dead_code)]
metadata: Option<FileMetaDataRef>,
metadata: Option<FileMetadataRef>,
}

impl ParquetExec {
Expand All @@ -34,7 +34,7 @@ impl ParquetExec {
options: ParquetOptions,
cloud_options: Option<CloudOptions>,
file_options: FileScanOptions,
metadata: Option<FileMetaDataRef>,
metadata: Option<FileMetadataRef>,
) -> Self {
ParquetExec {
sources,
Expand Down
8 changes: 4 additions & 4 deletions crates/polars-parquet/src/arrow/read/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ pub use deserialize::{
#[cfg(feature = "async")]
use futures::{AsyncRead, AsyncSeek};
use polars_error::PolarsResult;
pub use schema::{infer_schema, FileMetaData};
pub use schema::{infer_schema, FileMetadata};

use crate::parquet::error::ParquetResult;
#[cfg(feature = "async")]
Expand All @@ -24,7 +24,7 @@ pub use crate::parquet::read::{get_page_stream, read_metadata_async as _read_met
pub use crate::parquet::{
error::ParquetError,
fallible_streaming_iterator,
metadata::{ColumnChunkMetadata, ColumnDescriptor, RowGroupMetaData},
metadata::{ColumnChunkMetadata, ColumnDescriptor, RowGroupMetadata},
page::{CompressedDataPage, DataPageHeader, Page},
read::{
decompress, get_column_iterator, read_metadata as _read_metadata, BasicDecompressor,
Expand Down Expand Up @@ -54,15 +54,15 @@ pub fn get_field_pages<'a, T>(
}

/// Reads parquets' metadata synchronously.
pub fn read_metadata<R: Read + Seek>(reader: &mut R) -> PolarsResult<FileMetaData> {
pub fn read_metadata<R: Read + Seek>(reader: &mut R) -> PolarsResult<FileMetadata> {
Ok(_read_metadata(reader)?)
}

/// Reads parquets' metadata asynchronously.
#[cfg(feature = "async")]
pub async fn read_metadata_async<R: AsyncRead + AsyncSeek + Send + Unpin>(
reader: &mut R,
) -> PolarsResult<FileMetaData> {
) -> PolarsResult<FileMetadata> {
Ok(_read_metadata_async(reader).await?)
}

Expand Down
8 changes: 4 additions & 4 deletions crates/polars-parquet/src/arrow/read/schema/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ pub use metadata::read_schema_from_metadata;
use polars_error::PolarsResult;

use self::metadata::parse_key_value_metadata;
pub use crate::parquet::metadata::{FileMetaData, KeyValue, SchemaDescriptor};
pub use crate::parquet::metadata::{FileMetadata, KeyValue, SchemaDescriptor};
pub use crate::parquet::schema::types::ParquetType;

/// Options when inferring schemas from Parquet
Expand All @@ -33,21 +33,21 @@ impl Default for SchemaInferenceOptions {
}
}

/// Infers a [`ArrowSchema`] from parquet's [`FileMetaData`].
/// Infers a [`ArrowSchema`] from parquet's [`FileMetadata`].
///
/// This first looks for the metadata key `"ARROW:schema"`; if it does not exist, it converts the
/// Parquet types declared in the file's Parquet schema to Arrow's equivalent.
///
/// # Error
/// This function errors iff the key `"ARROW:schema"` exists but is not correctly encoded,
/// indicating that that the file's arrow metadata was incorrectly written.
pub fn infer_schema(file_metadata: &FileMetaData) -> PolarsResult<ArrowSchema> {
pub fn infer_schema(file_metadata: &FileMetadata) -> PolarsResult<ArrowSchema> {
infer_schema_with_options(file_metadata, &None)
}

/// Like [`infer_schema`] but with configurable options which affects the behavior of inference
pub fn infer_schema_with_options(
file_metadata: &FileMetaData,
file_metadata: &FileMetadata,
options: &Option<SchemaInferenceOptions>,
) -> PolarsResult<ArrowSchema> {
let mut metadata = parse_key_value_metadata(file_metadata.key_value_metadata());
Expand Down
6 changes: 3 additions & 3 deletions crates/polars-parquet/src/arrow/write/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use arrow::datatypes::ArrowSchema;
use polars_error::{PolarsError, PolarsResult};

use super::schema::schema_to_metadata_key;
use super::{to_parquet_schema, ThriftFileMetaData, WriteOptions};
use super::{to_parquet_schema, ThriftFileMetadata, WriteOptions};
use crate::parquet::metadata::{KeyValue, SchemaDescriptor};
use crate::parquet::write::{RowGroupIterColumns, WriteOptions as FileWriteOptions};

Expand Down Expand Up @@ -86,10 +86,10 @@ impl<W: Write> FileWriter<W> {
self.writer.into_inner()
}

/// Returns the underlying writer and [`ThriftFileMetaData`]
/// Returns the underlying writer and [`ThriftFileMetadata`]
/// # Panics
/// This function panics if [`Self::end`] has not yet been called
pub fn into_inner_and_metadata(self) -> (W, ThriftFileMetaData) {
pub fn into_inner_and_metadata(self) -> (W, ThriftFileMetadata) {
self.writer.into_inner_and_metadata()
}
}
2 changes: 1 addition & 1 deletion crates/polars-parquet/src/arrow/write/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ pub use utils::write_def_levels;
pub use crate::parquet::compression::{BrotliLevel, CompressionOptions, GzipLevel, ZstdLevel};
pub use crate::parquet::encoding::Encoding;
pub use crate::parquet::metadata::{
Descriptor, FileMetaData, KeyValue, SchemaDescriptor, ThriftFileMetaData,
Descriptor, FileMetadata, KeyValue, SchemaDescriptor, ThriftFileMetadata,
};
pub use crate::parquet::page::{CompressedDataPage, CompressedPage, Page};
use crate::parquet::schema::types::PrimitiveType as ParquetPrimitiveType;
Expand Down
Loading

0 comments on commit 8ebd739

Please sign in to comment.