|
NAMEunicode::bidi, unicode::bidi_calc, unicode::bidi_calc_types, unicode::bidi_reorder, unicode::bidi_cleanup, unicode::bidi_logical_order, unicode::bidi_combinings, unicode::bidi_needs_embed, unicode::bidi_embed, unicode::bidi_embed_paragraph_level, unicode::bidi_get_direction, unicode::bidi_override - unicode bi-directional algorithmSYNOPSIS#include <courier-unicode.h> struct unicode::bidi_calc_types { bidi_calc_types(const std::u32string & string); std::vector<unicode_bidi_type_t> types ; void setbnl(std::u32string & string); }.fi std::tuple<std::vector<unicode_bidi_level_t>, struct unicode_bidi_direction> unicode::bidi_calc(const unicode::bidi_calc_types &ustring); std::tuple<std::vector<unicode_bidi_level_t>, struct unicode_bidi_direction> unicode::bidi_calc(const unicode::bidi_calc_types &ustring, unicode_bidi_level_t embedding_level); int unicode::bidi_reorder(std::u32string &string, std::vector<unicode_bidi_level_t> &embedding_level, const std::function<void (size_t, size_t)> &reorder_callback=[](size_t, size_t){}, size_t starting_pos=0, size_t n=(size_t)-1); void unicode::bidi_reorder(std::vector<unicode_bidi_level_t> &embedding_level, const std::function<void (size_t, size_t)> &reorder_callback=[](size_t, size_t){}, size_t starting_pos=0, size_t n=(size_t)-1); void unicode::bidi_cleanup(std::u32string &string, const std::function<void (size_t)> &removed_callback=[](size_t){}, int cleanup_options); int unicode::bidi_cleanup(std::u32string &string, std::vector <unicode_bidi_level_t> &levels, const std::function<void (size_t)> &removed_callback=[](size_t){}, int cleanup_options=0); int unicode::bidi_cleanup(std::u32string &string, std::vector <unicode_bidi_level_t> &levels, const std::function<void (size_t)> &removed_callback, int cleanup_options, size_t starting_pos, size_t n); int unicode::bidi_logical_order(std::u32string &string, std::vector <unicode_bidi_level_t> &levels, unicode_bidi_level_t paragraph_embedding, const std::function<void (size_t, size_t)> &reorder_callback=[](size_t, size_t){}, size_t starting_pos=0, size_t n=(size_t)-1); void unicode::bidi_combinings(const std::u32string &string, const std::vector <unicode_bidi_level_t> &levels, const std::function <void (unicode_bidi_level_t level, size_t level_start, size_t n_chars, size_t comb_start, size_t n_comb_chars)> &callback); void unicode::bidi_combinings(const std::u32string &string, const std::function <void (unicode_bidi_level_t level, size_t level_start, size_t n_chars, size_t comb_start, size_t n_comb_chars)> &callback); void unicode::bidi_logical_order(std::vector <unicode_bidi_level_t> &levels, unicode_bidi_level_t paragraph_embedding, const std::function<void (size_t, size_t)> &reorder_callback, size_t starting_pos=0, size_t n=(size_t)-1); bool unicode::bidi_needs_embed(const std::u32string &string, const std::vector <unicode_bidi_level_t> &levels, const unicode_bidi_level_t (paragraph_embedding=NULL, size_t starting_pos=0, size_t n=(size_t)-1); int unicode::bidi_embed(const std::u32string &string, const std::vector <unicode_bidi_level_t> &levels, unicode_bidi_level_t paragraph_embedding, const std::function<void (const char32_t *, size_t, bool)> &callback); std::u32string unicode::bidi_embed(const std::u32string &string, const std::vector <unicode_bidi_level_t> &levels, unicode_bidi_level_t paragraph_embedding); char32_t unicode_bidi_embed_paragraph_level(const std::u32string &string, unicode_bidi_level_t paragraph_embedding); unicode_bidi_direction bidi_get_direction(const std::u32string &string, size_t starting_pos=0, size_t n=(size_t)-1); std::u32string bidi_override(const std::u32string &string, unicode_bidi_level_t direction, int cleanup_options=0); DESCRIPTIONThese functions implement the C++ interface for the Unicode Bi-Directional algorithm[1]. See the description of the underlying unicode_bidi(3) C library API for more information. C++ specific notes:•unicode::bidi_calc returns the directional
embedding value buffer and the calculated paragraph embedding level. Its
ustring is implicitly converted from a std::u32string:
std::u32string text; auto [levels, direction]=unicode::bidi_calc(text); std::u32string text; unicode::bidi_calc_types types{text}; types.setbnl(text); // Optional // types.types is a std::vector of enum_bidi_types_t values auto [levels, direction]=unicode::bidi_calc(types); Note In all cases the std::u32string cannot be a temporary object, and it must remain in scope until unicode::bidi_calc() returns. •Several C functions provide a
“dry-run” mode by passing a NULL pointer. The C++ API provides
separate overloads, with and without the nullable parameter.
•Several C functions accept a nullable function
pointer, with the NULL function pointer specifying no callback. The C++
functions have a std::function parameter with a default do-nothing
closure.
•Several C functions accept two parameters, a
Unicode character pointer and the embedding level buffer, and a single
parameter that specifies the size of both. The equivalent C++ function takes
two discrete parameters, a std::u32string and a std::vector and returns an
int; a negative value if their sizes differ, and 0 if their sizes match, and
the requested function completes. The unicode::bidi_embed overload that
returns a std::u32string returns an empty string in case of a mismatch.
•unicode::bidi_reorder reorders the entire
string and its embedding_levels by default. The optional
starting_pos and n parameters limit the reordering to the
indicated subset of the original string (specified as the starting position
offset index, and the number of characters).
•unicode::bidi_reorder,
unicode::bidi_cleanup, unicode::bidi_logical_order,
unicode::bidi_needs_embed and unicode::bidi_get_direction take
two optional parameters (defaulted values or overloaded) specifying an
optional starting position and number of characters that define a subset of
the original string that gets reordered, cleaned up, or has its direction
determined.
This unicode::bidi_cleanup does not trim off the passed in string and embedding level buffer, since it affects only a subset of the string. The number of times the removed character callback gets invoked indicates how much the substring should be trimmed off. •unicode::bidi_override modifies the
passed-in string as follows:
•unicode::bidi_cleanup() is applied with
the specified, or defaulted, cleanup_options
•Either the LRO or an RLO override marker gets
prepended to the Unicode string, forcing the entire string to be interpreted
in a single rendering direction, when processed by the Unicode bi-directional
algorithm.
unicode::bidi_override makes it possible to use a Unicode-aware application or algorithm in a context that only works with text that's always displayed in a fixed direction, allowing graceful handling of input containing bi-directional text. unicode::literals namespaceusing namespace unicode::literals; std::u32string foo(std::u32string bar) { return bar + LRO; } This namespace contains the following constexpr definitions: •char32_t arrays with literal Unicode character
strings containing Unicode directional, isolate, and override markers, like
LRO, RLO and others.
•CLEANUP_EXTRA, CLEANUP_BNL, and CLEANUP_CANONICAL
options for unicode::bidi_cleanup().
SEE ALSOcourier-unicode(7), unicode_bidi(3).AUTHORSam VarshavchikAuthor
NOTES
https://www.unicode.org/reports/tr9/tr9-42.html
Visit the GSP FreeBSD Man Page Interface. |