Struct icu::segmenter::LineSegmenter
source · [−]pub struct LineSegmenter { /* private fields */ }
Expand description
Supports loading line break data, and creating line break iterators for different string encodings.
Examples
Segment a string with default options:
use icu_segmenter::LineSegmenter;
let segmenter =
LineSegmenter::try_new_unstable(&icu_testdata::unstable())
.expect("Data exists");
let breakpoints: Vec<usize> =
segmenter.segment_str("Hello World").collect();
assert_eq!(&breakpoints, &[6, 11]);
Segment a string with CSS option overrides:
use icu_segmenter::{
LineBreakOptions, LineBreakRule, LineSegmenter, WordBreakRule,
};
let mut options = LineBreakOptions::default();
options.line_break_rule = LineBreakRule::Strict;
options.word_break_rule = WordBreakRule::BreakAll;
options.ja_zh = false;
let segmenter = LineSegmenter::try_new_with_options_unstable(
&icu_testdata::unstable(),
options,
)
.expect("Data exists");
let breakpoints: Vec<usize> =
segmenter.segment_str("Hello World").collect();
assert_eq!(&breakpoints, &[1, 2, 3, 4, 6, 7, 8, 9, 10, 11]);
Segment a Latin1 byte string:
use icu_segmenter::LineSegmenter;
let segmenter =
LineSegmenter::try_new_unstable(&icu_testdata::unstable())
.expect("Data exists");
let breakpoints: Vec<usize> =
segmenter.segment_latin1(b"Hello World").collect();
assert_eq!(&breakpoints, &[6, 11]);
Implementations
sourceimpl LineSegmenter
impl LineSegmenter
sourcepub fn try_new_unstable<D>(
provider: &D
) -> Result<LineSegmenter, SegmenterError> where
D: DataProvider<LineBreakDataV1Marker> + DataProvider<LstmDataV1Marker> + DataProvider<GraphemeClusterBreakDataV1Marker> + ?Sized,
pub fn try_new_unstable<D>(
provider: &D
) -> Result<LineSegmenter, SegmenterError> where
D: DataProvider<LineBreakDataV1Marker> + DataProvider<LstmDataV1Marker> + DataProvider<GraphemeClusterBreakDataV1Marker> + ?Sized,
Construct a LineSegmenter
with default LineBreakOptions
.
sourcepub fn try_new_with_any_provider(
provider: &impl AnyProvider
) -> Result<LineSegmenter, SegmenterError>
pub fn try_new_with_any_provider(
provider: &impl AnyProvider
) -> Result<LineSegmenter, SegmenterError>
Creates a new instance using an AnyProvider
.
For details on the behavior of this function, see: Self::try_new_unstable
sourcepub fn try_new_with_buffer_provider(
provider: &impl BufferProvider
) -> Result<LineSegmenter, SegmenterError>
pub fn try_new_with_buffer_provider(
provider: &impl BufferProvider
) -> Result<LineSegmenter, SegmenterError>
✨ Enabled with the "serde"
feature.
Creates a new instance using a BufferProvider
.
For details on the behavior of this function, see: Self::try_new_unstable
sourcepub fn try_new_with_options_unstable<D>(
provider: &D,
options: LineBreakOptions
) -> Result<LineSegmenter, SegmenterError> where
D: DataProvider<LineBreakDataV1Marker> + DataProvider<LstmDataV1Marker> + DataProvider<GraphemeClusterBreakDataV1Marker> + ?Sized,
pub fn try_new_with_options_unstable<D>(
provider: &D,
options: LineBreakOptions
) -> Result<LineSegmenter, SegmenterError> where
D: DataProvider<LineBreakDataV1Marker> + DataProvider<LstmDataV1Marker> + DataProvider<GraphemeClusterBreakDataV1Marker> + ?Sized,
Construct a LineSegmenter
with custom LineBreakOptions
.
sourcepub fn try_new_with_options_with_any_provider(
provider: &impl AnyProvider,
options: LineBreakOptions
) -> Result<LineSegmenter, SegmenterError>
pub fn try_new_with_options_with_any_provider(
provider: &impl AnyProvider,
options: LineBreakOptions
) -> Result<LineSegmenter, SegmenterError>
Creates a new instance using an AnyProvider
.
For details on the behavior of this function, see: Self::try_new_with_options_unstable
sourcepub fn try_new_with_options_with_buffer_provider(
provider: &impl BufferProvider,
options: LineBreakOptions
) -> Result<LineSegmenter, SegmenterError>
pub fn try_new_with_options_with_buffer_provider(
provider: &impl BufferProvider,
options: LineBreakOptions
) -> Result<LineSegmenter, SegmenterError>
✨ Enabled with the "serde"
feature.
Creates a new instance using a BufferProvider
.
For details on the behavior of this function, see: Self::try_new_with_options_unstable
sourcepub fn segment_str(
&'l self,
input: &'s str
) -> LineBreakIterator<'l, 's, LineBreakTypeUtf8>ⓘNotable traits for LineBreakIterator<'l, 's, Y>impl<'l, 's, Y> Iterator for LineBreakIterator<'l, 's, Y> where
Y: LineBreakType<'l, 's>, type Item = usize;
pub fn segment_str(
&'l self,
input: &'s str
) -> LineBreakIterator<'l, 's, LineBreakTypeUtf8>ⓘNotable traits for LineBreakIterator<'l, 's, Y>impl<'l, 's, Y> Iterator for LineBreakIterator<'l, 's, Y> where
Y: LineBreakType<'l, 's>, type Item = usize;
Y: LineBreakType<'l, 's>, type Item = usize;
Create a line break iterator for an str
(a UTF-8 string).
sourcepub fn segment_utf8(
&'l self,
input: &'s [u8]
) -> LineBreakIterator<'l, 's, LineBreakTypePotentiallyIllFormedUtf8>ⓘNotable traits for LineBreakIterator<'l, 's, Y>impl<'l, 's, Y> Iterator for LineBreakIterator<'l, 's, Y> where
Y: LineBreakType<'l, 's>, type Item = usize;
pub fn segment_utf8(
&'l self,
input: &'s [u8]
) -> LineBreakIterator<'l, 's, LineBreakTypePotentiallyIllFormedUtf8>ⓘNotable traits for LineBreakIterator<'l, 's, Y>impl<'l, 's, Y> Iterator for LineBreakIterator<'l, 's, Y> where
Y: LineBreakType<'l, 's>, type Item = usize;
Y: LineBreakType<'l, 's>, type Item = usize;
Create a line break iterator for a potentially ill-formed UTF8 string
Invalid characters are treated as REPLACEMENT CHARACTER
sourcepub fn segment_latin1(
&'l self,
input: &'s [u8]
) -> LineBreakIterator<'l, 's, LineBreakTypeLatin1>ⓘNotable traits for LineBreakIterator<'l, 's, Y>impl<'l, 's, Y> Iterator for LineBreakIterator<'l, 's, Y> where
Y: LineBreakType<'l, 's>, type Item = usize;
pub fn segment_latin1(
&'l self,
input: &'s [u8]
) -> LineBreakIterator<'l, 's, LineBreakTypeLatin1>ⓘNotable traits for LineBreakIterator<'l, 's, Y>impl<'l, 's, Y> Iterator for LineBreakIterator<'l, 's, Y> where
Y: LineBreakType<'l, 's>, type Item = usize;
Y: LineBreakType<'l, 's>, type Item = usize;
Create a line break iterator for a Latin-1 (8-bit) string.
sourcepub fn segment_utf16(
&'l self,
input: &'s [u16]
) -> LineBreakIterator<'l, 's, LineBreakTypeUtf16>ⓘNotable traits for LineBreakIterator<'l, 's, Y>impl<'l, 's, Y> Iterator for LineBreakIterator<'l, 's, Y> where
Y: LineBreakType<'l, 's>, type Item = usize;
pub fn segment_utf16(
&'l self,
input: &'s [u16]
) -> LineBreakIterator<'l, 's, LineBreakTypeUtf16>ⓘNotable traits for LineBreakIterator<'l, 's, Y>impl<'l, 's, Y> Iterator for LineBreakIterator<'l, 's, Y> where
Y: LineBreakType<'l, 's>, type Item = usize;
Y: LineBreakType<'l, 's>, type Item = usize;
Create a line break iterator for a UTF-16 string.
Auto Trait Implementations
impl RefUnwindSafe for LineSegmenter
impl Send for LineSegmenter
impl Sync for LineSegmenter
impl Unpin for LineSegmenter
impl UnwindSafe for LineSegmenter
Blanket Implementations
sourceimpl<T> BorrowMut<T> for T where
T: ?Sized,
impl<T> BorrowMut<T> for T where
T: ?Sized,
const: unstable · sourcefn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more