Files
file	avro.hpp

file	csv.hpp

file	io/json.hpp

file	orc.hpp

file	parquet.hpp

Classes
class	cudf::io::avro_reader_options
	Settings to use for `read_avro()`. More...

class	cudf::io::avro_reader_options_builder
	Builder to build options for `read_avro()`. More...

class	cudf::io::csv_reader_options
	Settings to use for `read_csv()`. More...

class	cudf::io::csv_reader_options_builder
	Builder to build options for `read_csv()`. More...

struct	cudf::io::schema_element
	Allows specifying the target types for nested JSON data via json_reader_options' `set_dtypes` method. More...

class	cudf::io::json_reader_options
	Input arguments to the `read_json` interface. More...

class	cudf::io::json_reader_options_builder
	Builds settings to use for `read_json()`. More...

class	cudf::io::orc_reader_options
	Settings to use for `read_orc()`. More...

class	cudf::io::orc_reader_options_builder
	Builds settings to use for `read_orc()`. More...

class	cudf::io::parquet_reader_options
	Settings for `read_parquet()`. More...

class	cudf::io::parquet_reader_options_builder
	Builds parquet_reader_options to use for `read_parquet()`. More...

class	cudf::io::chunked_parquet_reader
	The chunked parquet reader class to read Parquet file iteratively in to a series of tables, chunk by chunk. More...

Enumerations
enum class	cudf::io::json_recovery_mode_t { cudf::io::FAIL , cudf::io::RECOVER_WITH_NULL }
	Control the error recovery behavior of the json parser. More...

Functions
table_with_metadata	cudf::io::read_avro (avro_reader_options const &options, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
	Reads an Avro dataset into a set of columns. More...

table_with_metadata	cudf::io::read_csv (csv_reader_options options, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
	Reads a CSV dataset into a set of columns. More...

table_with_metadata	cudf::io::read_json (json_reader_options options, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
	Reads a JSON dataset into a set of columns. More...

table_with_metadata	cudf::io::read_orc (orc_reader_options const &options, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
	Reads an ORC dataset into a set of columns. More...

raw_orc_statistics	cudf::io::read_raw_orc_statistics (source_info const &src_info, rmm::cuda_stream_view stream=cudf::get_default_stream())
	Reads file-level and stripe-level statistics of ORC dataset. More...

parsed_orc_statistics	cudf::io::read_parsed_orc_statistics (source_info const &src_info, rmm::cuda_stream_view stream=cudf::get_default_stream())
	Reads file-level and stripe-level statistics of ORC dataset. More...

orc_metadata	cudf::io::read_orc_metadata (source_info const &src_info, rmm::cuda_stream_view stream=cudf::get_default_stream())
	Reads metadata of ORC dataset. More...

table_with_metadata	cudf::io::read_parquet (parquet_reader_options const &options, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
	Reads a Parquet dataset into a set of columns. More...

parquet_metadata	cudf::io::read_parquet_metadata (source_info const &src_info)
	Reads metadata of parquet dataset. More...

Variables
constexpr size_t	cudf::io::default_stripe_size_bytes = 64 * 1024 * 1024
	64MB default orc stripe size

constexpr size_type	cudf::io::default_stripe_size_rows = 1000000
	1M rows default orc stripe rows

constexpr size_type	cudf::io::default_row_index_stride = 10000
	10K rows default orc row index stride

constexpr size_t	cudf::io::default_row_group_size_bytes = 128 * 1024 * 1024
	128MB per row group

constexpr size_type	cudf::io::default_row_group_size_rows = 1000000
	1 million rows per row group

constexpr size_t	cudf::io::default_max_page_size_bytes = 512 * 1024
	512KB per page

constexpr size_type	cudf::io::default_max_page_size_rows = 20000
	20k rows per page

constexpr int32_t	cudf::io::default_column_index_truncate_length = 64
	truncate to 64 bytes

constexpr size_t	cudf::io::default_max_dictionary_size = 1024 * 1024
	1MB dictionary size

constexpr size_type	cudf::io::default_max_page_fragment_size = 5000
	5000 rows per page fragment

Detailed Description

Enumeration Type Documentation

◆ json_recovery_mode_t

enum cudf::io::json_recovery_mode_t

strong

Control the error recovery behavior of the json parser.

Enumerator
FAIL	Does not recover from an error when encountering an invalid format.
RECOVER_WITH_NULL	Recovers from an error, replacing invalid records with null.

Definition at line 60 of file io/json.hpp.

Function Documentation

◆ read_avro()

table_with_metadata cudf::io::read_avro	(	avro_reader_options const &	options,
		rmm::mr::device_memory_resource *	mr = `rmm::mr::get_current_device_resource()`
	)

Reads an Avro dataset into a set of columns.

The following code snippet demonstrates how to read a dataset from a file:

auto source  = cudf::io::source_info("dataset.avro");
auto options = cudf::io::avro_reader_options::builder(source);
auto result  = cudf::io::read_avro(options);

Parameters

options	Settings for controlling reading behavior
mr	Device memory resource used to allocate device memory of the table in the returned table_with_metadata

Returns: The set of columns along with metadata

◆ read_csv()

table_with_metadata cudf::io::read_csv	(	csv_reader_options	options,
		rmm::cuda_stream_view	stream = `cudf::get_default_stream()`,
		rmm::mr::device_memory_resource *	mr = `rmm::mr::get_current_device_resource()`
	)

Reads a CSV dataset into a set of columns.

The following code snippet demonstrates how to read a dataset from a file:

auto source  = cudf::io::source_info("dataset.csv");
auto options = cudf::io::csv_reader_options::builder(source);
auto result  = cudf::io::read_csv(options);

Parameters

options	Settings for controlling reading behavior
stream	CUDA stream used for device memory operations and kernel launches
mr	Device memory resource used to allocate device memory of the table in the returned table_with_metadata

Returns: The set of columns along with metadata

◆ read_json()

table_with_metadata cudf::io::read_json	(	json_reader_options	options,
		rmm::cuda_stream_view	stream = `cudf::get_default_stream()`,
		rmm::mr::device_memory_resource *	mr = `rmm::mr::get_current_device_resource()`
	)

Reads a JSON dataset into a set of columns.

The following code snippet demonstrates how to read a dataset from a file:

auto source  = cudf::io::source_info("dataset.json");
auto options = cudf::io::read_json_options::builder(source);
auto result  = cudf::io::read_json(options);

Parameters

options	Settings for controlling reading behavior
stream	CUDA stream used for device memory operations and kernel launches
mr	Device memory resource used to allocate device memory of the table in the returned table_with_metadata.

Returns: The set of columns along with metadata

◆ read_orc()

table_with_metadata cudf::io::read_orc	(	orc_reader_options const &	options,
		rmm::cuda_stream_view	stream = `cudf::get_default_stream()`,
		rmm::mr::device_memory_resource *	mr = `rmm::mr::get_current_device_resource()`
	)

Reads an ORC dataset into a set of columns.

The following code snippet demonstrates how to read a dataset from a file:

auto source  = cudf::io::source_info("dataset.orc");
auto options = cudf::io::orc_reader_options::builder(source);
auto result  = cudf::io::read_orc(options);

Parameters

options	Settings for controlling reading behavior
stream	CUDA stream used for device memory operations and kernel launches
mr	Device memory resource used to allocate device memory of the table in the returned table_with_metadata.

Returns: The set of columns

◆ read_orc_metadata()

orc_metadata cudf::io::read_orc_metadata	(	source_info const &	src_info,
		rmm::cuda_stream_view	stream = `cudf::get_default_stream()`
	)

Reads metadata of ORC dataset.

Parameters

src_info	Dataset source
stream	CUDA stream used for device memory operations and kernel launches

Returns: orc_metadata with ORC schema, number of rows and number of stripes.

◆ read_parquet()

table_with_metadata cudf::io::read_parquet	(	parquet_reader_options const &	options,
		rmm::cuda_stream_view	stream = `cudf::get_default_stream()`,
		rmm::mr::device_memory_resource *	mr = `rmm::mr::get_current_device_resource()`
	)

Reads a Parquet dataset into a set of columns.

The following code snippet demonstrates how to read a dataset from a file:

auto source  = cudf::io::source_info("dataset.parquet");
auto options = cudf::io::parquet_reader_options::builder(source);
auto result  = cudf::io::read_parquet(options);

Parameters

options	Settings for controlling reading behavior
stream	CUDA stream used for device memory operations and kernel launches
mr	Device memory resource used to allocate device memory of the table in the returned table_with_metadata

Returns: The set of columns along with metadata

◆ read_parquet_metadata()

parquet_metadata cudf::io::read_parquet_metadata ( source_info const & src_info )

Reads metadata of parquet dataset.

Parameters

src_info Dataset source

Returns: parquet_metadata with parquet schema, number of rows, number of row groups and key-value metadata.

◆ read_parsed_orc_statistics()

parsed_orc_statistics cudf::io::read_parsed_orc_statistics	(	source_info const &	src_info,
		rmm::cuda_stream_view	stream = `cudf::get_default_stream()`
	)

Reads file-level and stripe-level statistics of ORC dataset.

Parameters

src_info	Dataset source
stream	CUDA stream used for device memory operations and kernel launches

Returns: Column names and decoded ORC statistics

◆ read_raw_orc_statistics()

raw_orc_statistics cudf::io::read_raw_orc_statistics	(	source_info const &	src_info,
		rmm::cuda_stream_view	stream = `cudf::get_default_stream()`
	)

Reads file-level and stripe-level statistics of ORC dataset.

The following code snippet demonstrates how to read statistics of a dataset from a file:

auto result = cudf::read_raw_orc_statistics(cudf::source_info("dataset.orc"));

cudf::io::read_raw_orc_statistics

raw_orc_statistics read_raw_orc_statistics(source_info const &src_info, rmm::cuda_stream_view stream=cudf::get_default_stream())

Reads file-level and stripe-level statistics of ORC dataset.

Parameters

src_info	Dataset source
stream	CUDA stream used for device memory operations and kernel launches

Returns: Column names and encoded ORC statistics

Files

Classes

Enumerations

Functions

Variables

Detailed Description

Enumeration Type Documentation

◆ json_recovery_mode_t

Function Documentation

◆ read_avro()

◆ read_csv()

◆ read_json()

◆ read_orc()

◆ read_orc_metadata()

◆ read_parquet()

◆ read_parquet_metadata()

◆ read_parsed_orc_statistics()

◆ read_raw_orc_statistics()