77 #define WS_UTF8_ENC_1_M 0xffffff80    78 #define WS_UTF8_ENC_2_M 0xfffff800    79 #define WS_UTF8_ENC_3_M 0xffff0000    80 #define WS_UTF8_ENC_4_M 0xffe00000    81 #define WS_UTF8_ENC_5_M 0xfc000000    82 #define WS_UTF8_ENC_6_M 0x80000000    99 #define WS_UTF8_ENC_C_BITS  0x80   102 #define WS_UTF8_CONT_DATA_MASK  0x3f   108 #define WS_UTF8_ENC_TYPE(ch)            \   109     (((ch) & WS_UTF8_ENC_1_M) == 0      \   111      : (((ch) & WS_UTF8_ENC_2_M) == 0       \   113        : (((ch) & WS_UTF8_ENC_3_M) == 0     \   115          : (((ch) & WS_UTF8_ENC_4_M) == 0   \   117            : (((ch) & WS_UTF8_ENC_5_M) == 0 \   119              : (((ch) & WS_UTF8_ENC_6_M) == 0   \   125 #define WS_UTF8_DEC_1_M 0x80   126 #define WS_UTF8_DEC_2_M 0xe0   127 #define WS_UTF8_DEC_3_M 0xf0   128 #define WS_UTF8_DEC_4_M 0xf8   129 #define WS_UTF8_DEC_5_M 0xfc   130 #define WS_UTF8_DEC_6_M 0xfe   132 #define WS_UTF8_DEC_1_V 0x00   133 #define WS_UTF8_DEC_2_V 0xc0   134 #define WS_UTF8_DEC_3_V 0xe0   135 #define WS_UTF8_DEC_4_V 0xf0   136 #define WS_UTF8_DEC_5_V 0xf8   137 #define WS_UTF8_DEC_6_V 0xfc   154 #define WS_UTF8_DEC_C_M 0xc0   155 #define WS_UTF8_DEC_C_V 0x80   161 #define WS_UTF8_DEC_TYPE(b)                 \   162     (((b) & WS_UTF8_DEC_1_M) == WS_UTF8_DEC_1_V         \   164      : (((b) & WS_UTF8_DEC_2_M) == WS_UTF8_DEC_2_V      \   166        : (((b) & WS_UTF8_DEC_3_M) == WS_UTF8_DEC_3_V        \   168          : (((b) & WS_UTF8_DEC_4_M) == WS_UTF8_DEC_4_V      \   170            : (((b) & WS_UTF8_DEC_5_M) == WS_UTF8_DEC_5_V    \   172              : (((b) & WS_UTF8_DEC_6_M) == WS_UTF8_DEC_6_V  \   178 #define WS_UTF8_DEC_C_P(b) (((b) & WS_UTF8_DEC_C_M) == WS_UTF8_DEC_C_V)   205         ws_fatal(
"ws_utf8_append_char(): 0x%lx is not a valid UTF-8 character",
   215     for (i = num_bytes - 1; i > 0; i--) {
   226     string->len += num_bytes;
   234                    size_t *strlen_return)
   236     unsigned int num_bytes, i;
   249         for (i = 1; i < num_bytes; i++)
   260         *strlen_return = strlen;
   279     string->num_chars = 0;
   283     if (string->
data == NULL)
   287     string->num_chars = num_chars;
   297     unsigned int num_bytes, i;
   301     if (pos < 0 || pos >= string->
len)
   305     data = 
string->data + pos;
   312     if (pos + num_bytes > string->
len)
   320     for (i = 1; i < num_bytes; i++) {
   326     *posp = pos + num_bytes;
   333                                  unsigned char unknown_char,
   347     for (i = 0; i < 
string->num_chars; i++) {
   351             ws_fatal(
"ws_utf8_to_latin1_cstr(): internal inconsistency");
   354             cstr[i] = unknown_char;
   356             cstr[i] = (
unsigned char) ch;
   362         *len_return = 
string->num_chars;
 void ws_fatal(char *fmt,...)
void * ws_calloc(size_t num, size_t size)
static unsigned char utf8_hibits[7]
void * ws_realloc(void *ptr, size_t size)
#define WS_UTF8_DEC_TYPE(b)
void ws_utf8_free(WsUtf8String *string)
static unsigned char utf8_hidata_masks[7]
#define WS_UTF8_ENC_C_BITS
#define WS_UTF8_CONT_DATA_MASK
int ws_utf8_append_char(WsUtf8String *string, unsigned long ch)
WsUtf8String * ws_utf8_alloc()
int ws_utf8_set_data(WsUtf8String *string, const unsigned char *data, size_t len)
void ws_utf8_free_data(unsigned char *data)
void * ws_memdup(const void *ptr, size_t size)
#define WS_UTF8_DEC_C_P(b)
int ws_utf8_get_char(const WsUtf8String *string, unsigned long *ch_return, size_t *posp)
int ws_utf8_verify(const unsigned char *data, size_t len, size_t *strlen_return)
#define WS_UTF8_ENC_TYPE(ch)
void * ws_malloc(size_t size)
unsigned char * ws_utf8_to_latin1(const WsUtf8String *string, unsigned char unknown_char, size_t *len_return)