Kannel: Open Source WAP and SMS gateway  svn-r5335
wsutf8.h
Go to the documentation of this file.
1 /* ====================================================================
2  * The Kannel Software License, Version 1.0
3  *
4  * Copyright (c) 2001-2018 Kannel Group
5  * Copyright (c) 1998-2001 WapIT Ltd.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in
17  * the documentation and/or other materials provided with the
18  * distribution.
19  *
20  * 3. The end-user documentation included with the redistribution,
21  * if any, must include the following acknowledgment:
22  * "This product includes software developed by the
23  * Kannel Group (http://www.kannel.org/)."
24  * Alternately, this acknowledgment may appear in the software itself,
25  * if and wherever such third-party acknowledgments normally appear.
26  *
27  * 4. The names "Kannel" and "Kannel Group" must not be used to
28  * endorse or promote products derived from this software without
29  * prior written permission. For written permission, please
30  * contact org@kannel.org.
31  *
32  * 5. Products derived from this software may not be called "Kannel",
33  * nor may "Kannel" appear in their name, without prior written
34  * permission of the Kannel Group.
35  *
36  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39  * DISCLAIMED. IN NO EVENT SHALL THE KANNEL GROUP OR ITS CONTRIBUTORS
40  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
41  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
42  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
43  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
44  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
45  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
46  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
47  * ====================================================================
48  *
49  * This software consists of voluntary contributions made by many
50  * individuals on behalf of the Kannel Group. For more information on
51  * the Kannel Group, please see <http://www.kannel.org/>.
52  *
53  * Portions of this software are based upon software originally written at
54  * WapIT Ltd., Helsinki, Finland for the Kannel project.
55  */
56 
57 /*
58  *
59  * wsutf8.h
60  *
61  * Author: Markku Rossi <mtr@iki.fi>
62  *
63  * Copyright (c) 1999-2000 WAPIT OY LTD.
64  * All rights reserved.
65  *
66  * Functions to manipulate UTF-8 encoded strings.
67  *
68  * Specification: RFC-2279
69  *
70  */
71 
72 #ifndef WSUTF8_H
73 #define WSUTF8_H
74 
75 /********************* Types and defintions *****************************/
76 
77 /* UTF-8 string handle. */
79 {
80  /* The length of the UTF-8 encoded `data'. */
81  size_t len;
82 
83  /* The UTF-8 encoded data. */
84  unsigned char *data;
85 
86  /* The number of characters in the string. */
87  size_t num_chars;
88 };
89 
91 
92 /********************* Global functions *********************************/
93 
94 /* Allocate an empty UTF-8 string. The function returns NULL if the
95  allocation failed (out of memory). */
97 
98 /* Free an UTF-8 encoded string. */
99 void ws_utf8_free(WsUtf8String *string);
100 
101 /* Append the character `ch' to the string `string'. The function
102  returns 1 if the operation was successful or 0 otherwise (out of
103  memory). */
104 int ws_utf8_append_char(WsUtf8String *string, unsigned long ch);
105 
106 /* Verify the UTF-8 encoded string `data' containing `len' bytes of
107  data. The function returns 1 if the `data' is correctly encoded
108  and 0 otherwise. If the argument `strlen_return' is not NULL, it
109  is set to the number of characters in the string. */
110 int ws_utf8_verify(const unsigned char *data, size_t len,
111  size_t *strlen_return);
112 
113 /* Set UTF-8 encoded data `data', `len' to the string `string'. The
114  function returns 1 if the data was UTF-8 encoded and 0 otherwise
115  (malformed data or out of memory). The function frees the possible
116  old data from `string'. */
117 int ws_utf8_set_data(WsUtf8String *string, const unsigned char *data,
118  size_t len);
119 
120 /* Get a character from the UTF-8 string `string'. The argument
121  `posp' gives the index of the character in the UTF-8 encoded data.
122  It is not the sequence number of the character. It is its starting
123  position within the UTF-8 encoded data. The argument `posp' is
124  updated to point to the beginning of the next character within the
125  data. The character is returned in `ch_return'. The function
126  returns 1 if the operation was successful or 0 otherwise (index
127  `posp' was invalid or there were no more characters in the
128  string). */
129 int ws_utf8_get_char(const WsUtf8String *string, unsigned long *ch_return,
130  size_t *posp);
131 
132 /* Convert the UTF-8 encoded string `string' to null-terminated ISO
133  8859/1 (ISO latin1) string. Those characters of `string' which can
134  not be presented in latin1 are replaced with the character
135  `unknown_char'. If the argument `len_return' is not NULL, it is
136  set to contain the length of the returned string (excluding the
137  trailing null-character). The function returns a pointer to the
138  string or NULL if the operation failed (out of memory). The
139  returned string must be freed with the ws_utf8_free_data()
140  function. */
141 unsigned char *ws_utf8_to_latin1(const WsUtf8String *string,
142  unsigned char unknown_char,
143  size_t *len_return);
144 
145 /* Free a string, returned by the ws_utf8_to_latin1_cstr()
146  function. */
147 void ws_utf8_free_data(unsigned char *data);
148 
149 #endif /* not WSUTF8_H */
size_t len
Definition: wsutf8.h:81
WsUtf8String * ws_utf8_alloc(void)
Definition: wsutf8.c:182
unsigned char * data
Definition: wsutf8.h:84
int ws_utf8_get_char(const WsUtf8String *string, unsigned long *ch_return, size_t *posp)
Definition: wsutf8.c:293
void ws_utf8_free(WsUtf8String *string)
Definition: wsutf8.c:188
void ws_utf8_free_data(unsigned char *data)
Definition: wsutf8.c:368
unsigned char * ws_utf8_to_latin1(const WsUtf8String *string, unsigned char unknown_char, size_t *len_return)
Definition: wsutf8.c:332
int ws_utf8_set_data(WsUtf8String *string, const unsigned char *data, size_t len)
Definition: wsutf8.c:266
int ws_utf8_verify(const unsigned char *data, size_t len, size_t *strlen_return)
Definition: wsutf8.c:233
int ws_utf8_append_char(WsUtf8String *string, unsigned long ch)
Definition: wsutf8.c:198
size_t num_chars
Definition: wsutf8.h:87
See file LICENSE for details about the license agreement for using, modifying, copying or deriving work from this software.