#include <WordSegmenter.d.hpp>

Public Member Functions
std::unique_ptr< icu4x::WordBreakIteratorUtf8 >	segment (std::string_view input) const

std::unique_ptr< icu4x::WordBreakIteratorUtf16 >	segment16 (std::u16string_view input) const

std::unique_ptr< icu4x::WordBreakIteratorLatin1 >	segment_latin1 (diplomat::span< const uint8_t > input) const

Static Public Member Functions
static std::unique_ptr< icu4x::WordSegmenter >	create_auto ()

static diplomat::result< std::unique_ptr< icu4x::WordSegmenter >, icu4x::DataError >	create_auto_with_content_locale (const icu4x::Locale &locale)

static diplomat::result< std::unique_ptr< icu4x::WordSegmenter >, icu4x::DataError >	create_auto_with_content_locale_and_provider (const icu4x::DataProvider &provider, const icu4x::Locale &locale)

static std::unique_ptr< icu4x::WordSegmenter >	create_lstm ()

static diplomat::result< std::unique_ptr< icu4x::WordSegmenter >, icu4x::DataError >	create_lstm_with_content_locale (const icu4x::Locale &locale)

static diplomat::result< std::unique_ptr< icu4x::WordSegmenter >, icu4x::DataError >	create_lstm_with_content_locale_and_provider (const icu4x::DataProvider &provider, const icu4x::Locale &locale)

static std::unique_ptr< icu4x::WordSegmenter >	create_dictionary ()

static diplomat::result< std::unique_ptr< icu4x::WordSegmenter >, icu4x::DataError >	create_dictionary_with_content_locale (const icu4x::Locale &locale)

static diplomat::result< std::unique_ptr< icu4x::WordSegmenter >, icu4x::DataError >	create_dictionary_with_content_locale_and_provider (const icu4x::DataProvider &provider, const icu4x::Locale &locale)

static void	operator delete (void *ptr)

Detailed Description

An ICU4X word-break segmenter, capable of finding word breakpoints in strings.

See the Rust documentation for WordSegmenter for more information.

Member Function Documentation

◆ create_auto()

std::unique_ptr< icu4x::WordSegmenter > icu4x::WordSegmenter::create_auto ( )

inlinestatic

Construct an WordSegmenter with automatically selecting the best available LSTM or dictionary payload data, using compiled data. This does not assume any content locale.

Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, Khmer, Lao, and Thai.

See the Rust documentation for new_auto for more information.

◆ create_auto_with_content_locale()

diplomat::result< std::unique_ptr< icu4x::WordSegmenter >, icu4x::DataError > icu4x::WordSegmenter::create_auto_with_content_locale ( const icu4x::Locale & locale )

inlinestatic

Construct an WordSegmenter with automatically selecting the best available LSTM or dictionary payload data, using compiled data.

Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, Khmer, Lao, and Thai.

See the Rust documentation for try_new_auto for more information.

◆ create_auto_with_content_locale_and_provider()

diplomat::result< std::unique_ptr< icu4x::WordSegmenter >, icu4x::DataError > icu4x::WordSegmenter::create_auto_with_content_locale_and_provider	(	const icu4x::DataProvider &	provider,
		const icu4x::Locale &	locale )

inlinestatic

Construct an WordSegmenter with automatically selecting the best available LSTM or dictionary payload data, using a particular data source.

Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, Khmer, Lao, and Thai.

See the Rust documentation for try_new_auto for more information.

◆ create_dictionary()

std::unique_ptr< icu4x::WordSegmenter > icu4x::WordSegmenter::create_dictionary ( )

inlinestatic

Construct an WordSegmenter with with dictionary payload data for Chinese, Japanese, Burmese, Khmer, Lao, and Thai, using compiled data. This does not assume any content locale.

Note: currently, it uses dictionary for Chinese and Japanese, and dictionary for Burmese, Khmer, Lao, and Thai.

See the Rust documentation for new_dictionary for more information.

◆ create_dictionary_with_content_locale()

diplomat::result< std::unique_ptr< icu4x::WordSegmenter >, icu4x::DataError > icu4x::WordSegmenter::create_dictionary_with_content_locale ( const icu4x::Locale & locale )

inlinestatic

Construct an WordSegmenter with dictionary payload data for Chinese, Japanese, Burmese, Khmer, Lao, and Thai, using compiled data.

Note: currently, it uses dictionary for Chinese and Japanese, and dictionary for Burmese, Khmer, Lao, and Thai.

See the Rust documentation for try_new_dictionary for more information.

◆ create_dictionary_with_content_locale_and_provider()

diplomat::result< std::unique_ptr< icu4x::WordSegmenter >, icu4x::DataError > icu4x::WordSegmenter::create_dictionary_with_content_locale_and_provider	(	const icu4x::DataProvider &	provider,
		const icu4x::Locale &	locale )

inlinestatic

Construct an WordSegmenter with dictionary payload data for Chinese, Japanese, Burmese, Khmer, Lao, and Thai, using a particular data source.

Note: currently, it uses dictionary for Chinese and Japanese, and dictionary for Burmese, Khmer, Lao, and Thai.

See the Rust documentation for try_new_dictionary for more information.

◆ create_lstm()

std::unique_ptr< icu4x::WordSegmenter > icu4x::WordSegmenter::create_lstm ( )

inlinestatic

Construct an WordSegmenter with LSTM payload data for Burmese, Khmer, Lao, and Thai, using compiled data. This does not assume any content locale.

Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, Khmer, Lao, and Thai.

See the Rust documentation for new_lstm for more information.

◆ create_lstm_with_content_locale()

diplomat::result< std::unique_ptr< icu4x::WordSegmenter >, icu4x::DataError > icu4x::WordSegmenter::create_lstm_with_content_locale ( const icu4x::Locale & locale )

inlinestatic

Construct an WordSegmenter with LSTM payload data for Burmese, Khmer, Lao, and Thai, using compiled data.

Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, Khmer, Lao, and Thai.

See the Rust documentation for try_new_lstm for more information.

◆ create_lstm_with_content_locale_and_provider()

diplomat::result< std::unique_ptr< icu4x::WordSegmenter >, icu4x::DataError > icu4x::WordSegmenter::create_lstm_with_content_locale_and_provider	(	const icu4x::DataProvider &	provider,
		const icu4x::Locale &	locale )

inlinestatic

Construct an WordSegmenter with LSTM payload data for Burmese, Khmer, Lao, and Thai, using a particular data source.

Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, Khmer, Lao, and Thai.

See the Rust documentation for try_new_lstm for more information.

◆ operator delete()

void icu4x::WordSegmenter::operator delete ( void * ptr )

inlinestatic

◆ segment()

std::unique_ptr< icu4x::WordBreakIteratorUtf8 > icu4x::WordSegmenter::segment ( std::string_view input ) const

inline

Segments a string.

Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according to the WHATWG Encoding Standard.

See the Rust documentation for segment_utf8 for more information.

◆ segment16()

std::unique_ptr< icu4x::WordBreakIteratorUtf16 > icu4x::WordSegmenter::segment16 ( std::u16string_view input ) const

inline

Segments a string.

Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according to the WHATWG Encoding Standard.

See the Rust documentation for segment_utf16 for more information.

◆ segment_latin1()

std::unique_ptr< icu4x::WordBreakIteratorLatin1 > icu4x::WordSegmenter::segment_latin1 ( diplomat::span< const uint8_t > input ) const

inline

Segments a Latin-1 string.

See the Rust documentation for segment_latin1 for more information.

The documentation for this class was generated from the following files:

ffi/capi/bindings/cpp/icu4x/WordSegmenter.d.hpp
ffi/capi/bindings/cpp/icu4x/WordSegmenter.hpp

Public Member Functions

Static Public Member Functions

Detailed Description

Member Function Documentation

◆ create_auto()

◆ create_auto_with_content_locale()

◆ create_auto_with_content_locale_and_provider()

◆ create_dictionary()

◆ create_dictionary_with_content_locale()

◆ create_dictionary_with_content_locale_and_provider()

◆ create_lstm()

◆ create_lstm_with_content_locale()

◆ create_lstm_with_content_locale_and_provider()

◆ operator delete()

◆ segment()

◆ segment16()

◆ segment_latin1()