A core class of ut8 encoded string functions.
More...
#include <unicode.h>
|
static unsigned | ccount (const char *string, ucs4_t character) |
| Count occurrences of a unicode character in string. More...
|
|
static size_t | chars (const unicode_t string) |
| How many chars requires to encode a given wchar string. More...
|
|
static size_t | chars (ucs4_t character) |
| How many chars requires to encode a given unicode character. More...
|
|
static ucs4_t | codepoint (const char *encoded) |
| Convert a utf8 encoded codepoint to a ucs4 character value. More...
|
|
static size_t | count (const char *string) |
| Count ut8 encoded ucs4 codepoints in string. More...
|
|
static const char * | find (const char *string, ucs4_t character, size_t start=0) |
| Find first occurance of character in string. More...
|
|
static ucs4_t | get (const char *cp) |
| Get a unicode character from a character protocol. More...
|
|
static char * | offset (char *string, ssize_t position) |
| Get codepoint offset in a string. More...
|
|
static size_t | pack (unicode_t unicode, const char *cp, size_t len) |
| Convert a utf8 string into a unicode data buffer. More...
|
|
static void | put (ucs4_t character, char *buf) |
| Push a unicode character to a character protocol. More...
|
|
static const char * | rfind (const char *string, ucs4_t character, size_t end=(size_t) -1l) |
| Find last occurrence of character in string. More...
|
|
static unsigned | size (const char *codepoint) |
| Compute character size of utf8 string codepoint. More...
|
|
static ucs4_t * | udup (const char *string) |
| Dup a utf8 string into a ucs4_t string.
|
|
static size_t | unpack (const unicode_t string, char *text, size_t size) |
| Convert a unicode string into utf8. More...
|
|
static ucs2_t * | wdup (const char *string) |
| Dup a utf8 string into a ucs2_t representation.
|
|
|
static const char * | nil |
| A convenient NULL pointer value.
|
|
static const unsigned | ucsize |
| Size of "unicode_t" character codes, may not be ucs4_t size.
|
|
A core class of ut8 encoded string functions.
This is a foundation for all utf8 string processing.
- Author
- David Sugar
Definition at line 67 of file unicode.h.
◆ ccount()
static unsigned ucommon::utf8::ccount |
( |
const char * |
string, |
|
|
ucs4_t |
character |
|
) |
| |
|
static |
Count occurrences of a unicode character in string.
- Parameters
-
string | to search in. |
character | code to search for. |
- Returns
- count of occurrences.
◆ chars() [1/2]
static size_t ucommon::utf8::chars |
( |
const unicode_t |
string | ) |
|
|
static |
How many chars requires to encode a given wchar string.
- Parameters
-
- Returns
- number of chars required to encode given string.
◆ chars() [2/2]
static size_t ucommon::utf8::chars |
( |
ucs4_t |
character | ) |
|
|
static |
How many chars requires to encode a given unicode character.
- Parameters
-
- Returns
- number of chars required to encode given character.
◆ codepoint()
static ucs4_t ucommon::utf8::codepoint |
( |
const char * |
encoded | ) |
|
|
static |
Convert a utf8 encoded codepoint to a ucs4 character value.
- Parameters
-
- Returns
- ucs4 string or 0 if invalid.
- Examples
- unicode.cpp.
◆ count()
static size_t ucommon::utf8::count |
( |
const char * |
string | ) |
|
|
static |
Count ut8 encoded ucs4 codepoints in string.
- Parameters
-
- Returns
- codepount count, 0 if empty or invalid.
- Examples
- unicode.cpp.
◆ find()
static const char* ucommon::utf8::find |
( |
const char * |
string, |
|
|
ucs4_t |
character, |
|
|
size_t |
start = 0 |
|
) |
| |
|
static |
Find first occurance of character in string.
- Parameters
-
string | to search in. |
character | code to search for. |
start | offset in string in codepoints. |
- Returns
- pointer to first instance or NULL if not found.
◆ get()
static ucs4_t ucommon::utf8::get |
( |
const char * |
cp | ) |
|
|
static |
Get a unicode character from a character protocol.
- Parameters
-
buffer | of character protocol to read from. |
- Returns
- unicode character or EOF error.
◆ offset()
static char* ucommon::utf8::offset |
( |
char * |
string, |
|
|
ssize_t |
position |
|
) |
| |
|
static |
Get codepoint offset in a string.
- Parameters
-
string | of utf8 data. |
position | of codepoint in string, negative offsets are from tail. |
- Returns
- offset of codepoint or NULL if invalid.
◆ pack()
static size_t ucommon::utf8::pack |
( |
unicode_t |
unicode, |
|
|
const char * |
cp, |
|
|
size_t |
len |
|
) |
| |
|
static |
Convert a utf8 string into a unicode data buffer.
- Parameters
-
unicode | data buffer. |
buffer | of character protocol to pack from. |
size | of unicode data buffer in codepoints. |
- Returns
- number of code points converted.
◆ put()
static void ucommon::utf8::put |
( |
ucs4_t |
character, |
|
|
char * |
buf |
|
) |
| |
|
static |
Push a unicode character to a character protocol.
- Parameters
-
character | to push to file. |
buffer | of character protocol to push character to. |
- Returns
- unicode character or EOF on error.
◆ rfind()
static const char* ucommon::utf8::rfind |
( |
const char * |
string, |
|
|
ucs4_t |
character, |
|
|
size_t |
end = (size_t) -1l |
|
) |
| |
|
static |
Find last occurrence of character in string.
- Parameters
-
string | to search in. |
character | code to search for. |
end | offset to start from in codepoints. |
- Returns
- pointer to last instance or NULL if not found.
◆ size()
static unsigned ucommon::utf8::size |
( |
const char * |
codepoint | ) |
|
|
static |
Compute character size of utf8 string codepoint.
- Parameters
-
- Returns
- size of codepoint as utf8 encoded data, 0 if invalid.
- Examples
- unicode.cpp.
◆ unpack()
static size_t ucommon::utf8::unpack |
( |
const unicode_t |
string, |
|
|
char * |
text, |
|
|
size_t |
size |
|
) |
| |
|
static |
Convert a unicode string into utf8.
- Parameters
-
string | of unicode data to pack |
buffer | of character protocol to put data into. |
- Returns
- number of code points converted.
The documentation for this class was generated from the following file: