std::unique_ptr< cudf::column > byte_pair_encoding(cudf::strings_column_view const &input, bpe_merge_pairs const &merges_pairs, cudf::string_scalar const &separator=cudf::string_scalar(" "), rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Byte pair encode the input strings.
std::unique_ptr< bpe_merge_pairs > load_merge_pairs_file(std::string const &filename_merges, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Create a nvtext::bpe_merge_pairs from an input file.
std::unique_ptr< bpe_merge_pairs > load_merge_pairs(cudf::strings_column_view const &merge_pairs, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Create a nvtext::bpe_merge_pairs from a strings column.
The table of merge pairs for the BPE encoder.
bpe_merge_pairs(std::unique_ptr< cudf::column > &&input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Construct a new bpe merge pairs object.
bpe_merge_pairs(cudf::strings_column_view const &input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Construct a new bpe merge pairs object.
bpe_merge_pairs_impl * impl
Implementation of the BPE merge pairs table.