libcudf  24.02.00
Classes | Namespaces | Functions | Variables
parquet.hpp File Reference
#include <cudf/ast/expressions.hpp>
#include <cudf/io/detail/parquet.hpp>
#include <cudf/io/types.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>
#include <rmm/mr/device/per_device_resource.hpp>
#include <iostream>
#include <memory>
#include <optional>
#include <string>
#include <vector>

Go to the source code of this file.

Classes

class  cudf::io::parquet_reader_options
 Settings for read_parquet(). More...
 
class  cudf::io::parquet_reader_options_builder
 Builds parquet_reader_options to use for read_parquet(). More...
 
class  cudf::io::chunked_parquet_reader
 The chunked parquet reader class to read Parquet file iteratively in to a series of tables, chunk by chunk. More...
 
class  cudf::io::parquet_writer_options
 Settings for write_parquet(). More...
 
class  cudf::io::parquet_writer_options_builder
 Class to build parquet_writer_options. More...
 
class  cudf::io::chunked_parquet_writer_options
 Settings for write_parquet_chunked(). More...
 
class  cudf::io::chunked_parquet_writer_options_builder
 Builds options for chunked_parquet_writer_options. More...
 
class  cudf::io::parquet_chunked_writer
 chunked parquet writer class to handle options and write tables in chunks. More...
 

Namespaces

 cudf
 cuDF interfaces
 
 cudf::io
 IO interfaces.
 

Functions

table_with_metadata cudf::io::read_parquet (parquet_reader_options const &options, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
 Reads a Parquet dataset into a set of columns. More...
 
std::unique_ptr< std::vector< uint8_t > > cudf::io::write_parquet (parquet_writer_options const &options, rmm::cuda_stream_view stream=cudf::get_default_stream())
 Writes a set of columns to parquet format. More...
 
std::unique_ptr< std::vector< uint8_t > > cudf::io::merge_row_group_metadata (std::vector< std::unique_ptr< std::vector< uint8_t >>> const &metadata_list)
 Merges multiple raw metadata blobs that were previously created by write_parquet into a single metadata blob. More...
 

Variables

constexpr size_t cudf::io::default_row_group_size_bytes = 128 * 1024 * 1024
 128MB per row group
 
constexpr size_type cudf::io::default_row_group_size_rows = 1000000
 1 million rows per row group
 
constexpr size_t cudf::io::default_max_page_size_bytes = 512 * 1024
 512KB per page
 
constexpr size_type cudf::io::default_max_page_size_rows = 20000
 20k rows per page
 
constexpr int32_t cudf::io::default_column_index_truncate_length = 64
 truncate to 64 bytes
 
constexpr size_t cudf::io::default_max_dictionary_size = 1024 * 1024
 1MB dictionary size
 
constexpr size_type cudf::io::default_max_page_fragment_size = 5000
 5000 rows per page fragment