Kannel: Open Source WAP and SMS gateway  svn-r5335
charset.c File Reference
#include "gwlib/gwlib.h"
#include "gwlib/latin1_to_gsm.h"

Go to the source code of this file.

Data Structures

struct  alias_t
 

Macros

#define NRP   '?'
 

Typedefs

typedef struct alias_t alias_t
 

Functions

void charset_init ()
 
void charset_shutdown ()
 
void charset_gsm_to_utf8 (Octstr *ostr)
 
void charset_utf8_to_gsm (Octstr *ostr)
 
void charset_gsm_to_latin1 (Octstr *ostr)
 
void charset_latin1_to_gsm (Octstr *ostr)
 
void charset_gsm_to_nrc_iso_21_german (Octstr *ostr)
 
void charset_nrc_iso_21_german_to_gsm (Octstr *ostr)
 
int charset_gsm_truncate (Octstr *gsm, long max)
 
int charset_to_utf8 (Octstr *from, Octstr **to, Octstr *charset_from)
 
int charset_from_utf8 (Octstr *utf8, Octstr **to, Octstr *charset_to)
 
int charset_convert (Octstr *string, char *charset_from, char *charset_to)
 

Variables

struct {
   int   gsmesc
 
   int   latin1
 
gsm_esctolatin1 []
 
struct {
   int   gsmesc
 
   int   unichar
 
gsm_esctouni []
 
static const unsigned char gsm_to_latin1 [128]
 
static const int gsm_to_unicode [128]
 
alias_t chars_aliases []
 

Macro Definition Documentation

◆ NRP

#define NRP   '?'

Definition at line 73 of file charset.c.

Referenced by charset_utf8_to_gsm().

Typedef Documentation

◆ alias_t

typedef struct alias_t alias_t

Definition at line 182 of file charset.c.

Function Documentation

◆ charset_convert()

int charset_convert ( Octstr string,
char *  charset_from,
char *  charset_to 
)

Definition at line 589 of file charset.c.

References debug(), error(), octstr_append_data(), octstr_get_cstr, octstr_len(), octstr_truncate(), and warning().

Referenced by charset_processing(), convert_addr_from_pdu(), convert_charset(), data_sm_to_msg(), handle_mo_dcs(), handle_mt_dcs(), httpsmsc_send(), init_batch(), msg_to_pdu(), normalize_charset(), obey_request_thread(), parse_attr_value(), parse_text(), pdu_to_msg(), sms_charset_processing(), soap_msgdata_attribute(), soap_msgdata_deps(), and soap_o2o_msgdata_attribute().

590 {
591 #if HAVE_ICONV
592  char *from_buf, *to_buf, *pointer;
593  size_t inbytesleft, outbytesleft, ret;
594  iconv_t cd;
595 
596  if (!charset_from || !charset_to || !string) /* sanity check */
597  return -1;
598 
599  if (octstr_len(string) < 1 || strcasecmp(charset_from, charset_to) == 0)
600  return 0; /* we are done, nothing to convert */
601 
602  cd = iconv_open(charset_to, charset_from);
603  /* Did I succeed in getting a conversion descriptor ? */
604  if (cd == (iconv_t)(-1)) {
605  /* I guess not */
606  error(0,"Failed to convert string from <%s> to <%s> - probably broken type names.",
607  charset_from, charset_to);
608  return -1;
609  }
610 
611  from_buf = octstr_get_cstr(string);
612  inbytesleft = octstr_len(string);
613  /* allocate max sized buffer, assuming target encoding may be 4 byte unicode */
614  outbytesleft = inbytesleft * 4;
615  pointer = to_buf = gw_malloc(outbytesleft);
616 
617  do {
618  ret = iconv(cd, (ICONV_CONST char**) &from_buf, &inbytesleft, &pointer, &outbytesleft);
619  if(ret == -1) {
620  long tmp;
621  /* the conversion failed somewhere */
622  switch(errno) {
623  case E2BIG: /* no space in output buffer */
624  debug("charset", 0, "outbuf to small, realloc.");
625  tmp = pointer - to_buf;
626  to_buf = gw_realloc(to_buf, tmp + inbytesleft * 4);
627  outbytesleft += inbytesleft * 4;
628  pointer = to_buf + tmp;
629  ret = 0;
630  break;
631  case EILSEQ: /* invalid multibyte sequence */
632  case EINVAL: /* incomplete multibyte sequence */
633  warning(0, "Invalid/Incomplete multibyte sequence at position %d, skeep it.",
634  (int)(from_buf - octstr_get_cstr(string)));
635  /* skeep char and try next */
636  if (outbytesleft == 0) {
637  /* buffer to small */
638  tmp = pointer - to_buf;
639  to_buf = gw_realloc(to_buf, tmp + inbytesleft * 4);
640  outbytesleft += inbytesleft * 4;
641  pointer = to_buf + tmp;
642  }
643  pointer[0] = from_buf[0];
644  pointer++;
645  from_buf++;
646  inbytesleft--;
647  outbytesleft--;
648  ret = 0;
649  break;
650  }
651  }
652  } while(inbytesleft && ret == 0); /* stop if error occurs and not handled above */
653 
654  iconv_close(cd);
655 
656  if (ret != -1) {
657  /* conversion succeeded */
658  octstr_truncate(string, 0);
659  octstr_append_data(string, to_buf, pointer - to_buf);
660  if (ret)
661  debug("charset", 0, "charset_convert did %ld non-reversible conversions", (long) ret);
662  ret = 0;
663  } else
664  error(errno,"Failed to convert string from <%s> to <%s>.", charset_from, charset_to);
665 
666  if (errno == EILSEQ) {
667  debug("charset_convert", 0, "Found an invalid multibyte sequence at position <%d>",
668  (int)(from_buf - octstr_get_cstr(string)));
669  }
670  gw_free(to_buf);
671  return ret;
672 #endif
673  /* no convertion done due to not having iconv */
674  return -1;
675 }
void error(int err, const char *fmt,...)
Definition: log.c:648
void octstr_append_data(Octstr *ostr, const char *data, long len)
Definition: octstr.c:1497
#define octstr_get_cstr(ostr)
Definition: octstr.h:233
void warning(int err, const char *fmt,...)
Definition: log.c:660
long octstr_len(const Octstr *ostr)
Definition: octstr.c:342
void debug(const char *place, int err, const char *fmt,...)
Definition: log.c:726
void octstr_truncate(Octstr *ostr, int new_len)
Definition: octstr.c:1327

◆ charset_from_utf8()

int charset_from_utf8 ( Octstr utf8,
Octstr **  to,
Octstr charset_to 
)

Definition at line 558 of file charset.c.

References handler, octstr_create_from_data, octstr_get_cstr, and octstr_len().

Referenced by octstr_recode().

559 {
560  int ret;
561  xmlCharEncodingHandlerPtr handler = NULL;
562  xmlBufferPtr frombuffer = NULL;
563  xmlBufferPtr tobuffer = NULL;
564 
565  handler = xmlFindCharEncodingHandler(octstr_get_cstr(charset_to));
566  if (handler == NULL)
567  return -2;
568 
569  /* Build the libxml buffers for the transcoding. */
570  tobuffer = xmlBufferCreate();
571  frombuffer = xmlBufferCreate();
572  xmlBufferAdd(frombuffer, (unsigned char*)octstr_get_cstr(utf8), octstr_len(utf8));
573 
574  ret = xmlCharEncOutFunc(handler, tobuffer, frombuffer);
575  if (ret < -2)
576  /* Libxml seems to be here a little uncertain what would be the
577  * return code -3, so let's make it -1. Ugly thing, indeed. --tuo */
578  ret = -1;
579 
580  *to = octstr_create_from_data((char*)tobuffer->content, tobuffer->use);
581 
582  /* Memory cleanup. */
583  xmlBufferFree(tobuffer);
584  xmlBufferFree(frombuffer);
585 
586  return ret;
587 }
#define octstr_get_cstr(ostr)
Definition: octstr.h:233
long octstr_len(const Octstr *ostr)
Definition: octstr.c:342
SMPP_PDU *(* handler)(ESME *, SMPP_PDU *)
Definition: drive_smpp.c:213
#define octstr_create_from_data(data, len)
Definition: octstr.h:134

◆ charset_gsm_to_latin1()

void charset_gsm_to_latin1 ( Octstr ostr)

Definition at line 394 of file charset.c.

References gsm_esctolatin1, gsm_to_latin1, gsmesc, octstr_delete(), octstr_get_char(), octstr_len(), and octstr_set_char().

Referenced by main().

395 {
396  long pos, len;
397 
398  len = octstr_len(ostr);
399  for (pos = 0; pos < len; pos++) {
400  int c, new, i;
401 
402  c = octstr_get_char(ostr, pos);
403  if (c == 27 && pos + 1 < len) {
404  /* GSM escape code. Delete it, then process the next
405  * character specially. */
406  octstr_delete(ostr, pos, 1);
407  len--;
408  c = octstr_get_char(ostr, pos);
409  for (i = 0; gsm_esctolatin1[i].gsmesc >= 0; i++) {
410  if (gsm_esctolatin1[i].gsmesc == c)
411  break;
412  }
413  if (gsm_esctolatin1[i].gsmesc == c)
414  new = gsm_esctolatin1[i].latin1;
415  else if (c < 128)
416  new = gsm_to_latin1[c];
417  else
418  continue;
419  } else if (c < 128) {
420  new = gsm_to_latin1[c];
421  } else {
422  continue;
423  }
424  if (new != c)
425  octstr_set_char(ostr, pos, new);
426  }
427 }
static const struct @54 gsm_esctolatin1[]
int gsmesc
Definition: charset.c:84
static const unsigned char gsm_to_latin1[128]
Definition: charset.c:128
void octstr_delete(Octstr *ostr1, long pos, long len)
Definition: octstr.c:1527
long octstr_len(const Octstr *ostr)
Definition: octstr.c:342
int octstr_get_char(const Octstr *ostr, long pos)
Definition: octstr.c:406
void octstr_set_char(Octstr *ostr, long pos, int ch)
Definition: octstr.c:415

◆ charset_gsm_to_nrc_iso_21_german()

void charset_gsm_to_nrc_iso_21_german ( Octstr ostr)

Definition at line 460 of file charset.c.

References octstr_get_char(), octstr_len(), and octstr_set_char().

Referenced by msg_to_emimsg().

461 {
462  long pos, len;
463  int c, new;
464 
465  len = octstr_len(ostr);
466 
467  for (pos = 0; pos < len; pos++) {
468  c = octstr_get_char(ostr, pos);
469  switch (c) {
470  /* GSM value; NRC value */
471  case 0x5b: new = 0x5b; break; /* Ä */
472  case 0x5c: new = 0x5c; break; /* Ö */
473  case 0x5e: new = 0x5d; break; /* Ü */
474  case 0x7b: new = 0x7b; break; /* ä */
475  case 0x7c: new = 0x7c; break; /* ö */
476  case 0x7e: new = 0x7d; break; /* ü */
477  case 0x1e: new = 0x7e; break; /* ß */
478  case 0x5f: new = 0x5e; break; /* § */
479  default: new = c;
480  }
481  if (new != c)
482  octstr_set_char(ostr, pos, new);
483  }
484 }
long octstr_len(const Octstr *ostr)
Definition: octstr.c:342
int octstr_get_char(const Octstr *ostr, long pos)
Definition: octstr.c:406
void octstr_set_char(Octstr *ostr, long pos, int ch)
Definition: octstr.c:415

◆ charset_gsm_to_utf8()

void charset_gsm_to_utf8 ( Octstr ostr)

Convert octet string in GSM format to UTF-8. Every GSM character can be represented with unicode, hence nothing will be lost. Escaped charaters will be translated into appropriate UTF-8 character.

Definition at line 220 of file charset.c.

References gsm_esctouni, gsm_to_unicode, gsmesc, octstr_append(), octstr_append_char(), octstr_create, octstr_destroy(), octstr_get_char(), octstr_len(), octstr_truncate(), and warning().

Referenced by at2_decode7bituncompressed(), cimd2_accept_message(), convert_addr_from_pdu(), data_sm_to_msg(), extract_msgdata_part_by_coding(), handle_mo_dcs(), handle_operation(), main(), oisd_accept_message(), and pdu_to_msg().

221 {
222  long pos, len;
223  Octstr *newostr;
224 
225  if (ostr == NULL)
226  return;
227 
228  newostr = octstr_create("");
229  len = octstr_len(ostr);
230 
231  for (pos = 0; pos < len; pos++) {
232  int c, i;
233 
234  c = octstr_get_char(ostr, pos);
235  if (c > 127) {
236  warning(0, "Could not convert GSM (0x%02x) to Unicode.", c);
237  continue;
238  }
239 
240  if(c == 27 && pos + 1 < len) {
241  c = octstr_get_char(ostr, ++pos);
242  for (i = 0; gsm_esctouni[i].gsmesc >= 0; i++) {
243  if (gsm_esctouni[i].gsmesc == c)
244  break;
245  }
246  if (gsm_esctouni[i].gsmesc == c) {
247  /* found a value for escaped char */
248  c = gsm_esctouni[i].unichar;
249  } else {
250  /* nothing found, look esc in our table */
251  c = gsm_to_unicode[27];
252  pos--;
253  }
254  } else if (c < 128) {
255  c = gsm_to_unicode[c];
256  }
257  /* unicode to utf-8 */
258  if(c < 128) {
259  /* 0-127 are ASCII chars that need no conversion */
260  octstr_append_char(newostr, c);
261  } else {
262  /* test if it can be converterd into a two byte char */
263  if(c < 0x0800) {
264  octstr_append_char(newostr, ((c >> 6) | 0xC0) & 0xFF); /* add 110xxxxx */
265  octstr_append_char(newostr, (c & 0x3F) | 0x80); /* add 10xxxxxx */
266  } else {
267  /* else we encode with 3 bytes. This only happens in case of euro symbol */
268  octstr_append_char(newostr, ((c >> 12) | 0xE0) & 0xFF); /* add 1110xxxx */
269  octstr_append_char(newostr, (((c >> 6) & 0x3F) | 0x80) & 0xFF); /* add 10xxxxxx */
270  octstr_append_char(newostr, ((c & 0x3F) | 0x80) & 0xFF); /* add 10xxxxxx */
271  }
272  /* There are no 4 bytes encoded characters in GSM charset */
273  }
274  }
275 
276  octstr_truncate(ostr, 0);
277  octstr_append(ostr, newostr);
278  octstr_destroy(newostr);
279 }
static const struct @55 gsm_esctouni[]
void octstr_append(Octstr *ostr1, const Octstr *ostr2)
Definition: octstr.c:1504
void octstr_append_char(Octstr *ostr, int ch)
Definition: octstr.c:1517
int gsmesc
Definition: charset.c:84
static const int gsm_to_unicode[128]
Definition: charset.c:153
void warning(int err, const char *fmt,...)
Definition: log.c:660
void octstr_destroy(Octstr *ostr)
Definition: octstr.c:324
#define octstr_create(cstr)
Definition: octstr.h:125
long octstr_len(const Octstr *ostr)
Definition: octstr.c:342
Definition: octstr.c:118
void octstr_truncate(Octstr *ostr, int new_len)
Definition: octstr.c:1327
int octstr_get_char(const Octstr *ostr, long pos)
Definition: octstr.c:406

◆ charset_gsm_truncate()

int charset_gsm_truncate ( Octstr gsm,
long  max 
)

Definition at line 512 of file charset.c.

References gsm, octstr_get_char(), octstr_len(), and octstr_truncate().

Referenced by extract_msgdata_part_by_coding(), msg_to_emimsg(), and packet_encode_message().

513 {
514  if (octstr_len(gsm) > max) {
515  /* If the last GSM character was an escaped character,
516  * then chop off the escape as well as the character. */
517  if (octstr_get_char(gsm, max - 1) == 27)
518  octstr_truncate(gsm, max - 1);
519  else
520  octstr_truncate(gsm, max);
521  return 1;
522  }
523  return 0;
524 }
unsigned char gsm
Definition: smsc_cimd2.c:1025
long octstr_len(const Octstr *ostr)
Definition: octstr.c:342
void octstr_truncate(Octstr *ostr, int new_len)
Definition: octstr.c:1327
int octstr_get_char(const Octstr *ostr, long pos)
Definition: octstr.c:406

◆ charset_init()

void charset_init ( void  )

Definition at line 200 of file charset.c.

References alias_t::alias, chars_aliases, and alias_t::real.

Referenced by gwlib_init().

201 {
202  int i;
203 
204  for (i = 0; chars_aliases[i].real != NULL; i++) {
205  xmlAddEncodingAlias(chars_aliases[i].real,chars_aliases[i].alias);
206  /*debug("encoding",0,"Add encoding for %s",chars_aliases[i].alias);*/
207  }
208 }
alias_t chars_aliases[]
Definition: charset.c:184
char * real
Definition: charset.c:178

◆ charset_latin1_to_gsm()

void charset_latin1_to_gsm ( Octstr ostr)

Definition at line 430 of file charset.c.

References gw_assert(), latin1_to_gsm, octstr_get_char(), octstr_insert_data(), octstr_len(), and octstr_set_char().

431 {
432  long pos, len;
433  int c, new;
434  unsigned char esc = 27;
435 
436  len = octstr_len(ostr);
437  for (pos = 0; pos < len; pos++) {
438  c = octstr_get_char(ostr, pos);
439  gw_assert(c >= 0);
440  gw_assert(c <= 256);
441  new = latin1_to_gsm[c];
442  if (new < 0) {
443  /* Escaped GSM code */
444  octstr_insert_data(ostr, pos, (char*) &esc, 1);
445  pos++;
446  len++;
447  new = -new;
448  }
449  if (new != c)
450  octstr_set_char(ostr, pos, new);
451  }
452 }
gw_assert(wtls_machine->packet_to_send !=NULL)
void octstr_insert_data(Octstr *ostr, long pos, const char *data, long len)
Definition: octstr.c:1461
static const int latin1_to_gsm[256]
Definition: latin1_to_gsm.h:4
long octstr_len(const Octstr *ostr)
Definition: octstr.c:342
int octstr_get_char(const Octstr *ostr, long pos)
Definition: octstr.c:406
void octstr_set_char(Octstr *ostr, long pos, int ch)
Definition: octstr.c:415

◆ charset_nrc_iso_21_german_to_gsm()

void charset_nrc_iso_21_german_to_gsm ( Octstr ostr)

Definition at line 486 of file charset.c.

References octstr_get_char(), octstr_len(), and octstr_set_char().

Referenced by handle_operation().

487 {
488  long pos, len;
489  int c, new;
490 
491  len = octstr_len(ostr);
492 
493  for (pos = 0; pos < len; pos++) {
494  c = octstr_get_char(ostr, pos);
495  switch (c) {
496  /* NRC value; GSM value */
497  case 0x5b: new = 0x5b; break; /* Ä */
498  case 0x5c: new = 0x5c; break; /* Ö */
499  case 0x5d: new = 0x5e; break; /* Ü */
500  case 0x7b: new = 0x7b; break; /* ä */
501  case 0x7c: new = 0x7c; break; /* ö */
502  case 0x7d: new = 0x7e; break; /* ü */
503  case 0x7e: new = 0x1e; break; /* ß */
504  case 0x5e: new = 0x5f; break; /* § */
505  default: new = c;
506  }
507  if (new != c)
508  octstr_set_char(ostr, pos, new);
509  }
510 }
long octstr_len(const Octstr *ostr)
Definition: octstr.c:342
int octstr_get_char(const Octstr *ostr, long pos)
Definition: octstr.c:406
void octstr_set_char(Octstr *ostr, long pos, int ch)
Definition: octstr.c:415

◆ charset_shutdown()

void charset_shutdown ( void  )

Definition at line 210 of file charset.c.

Referenced by gwlib_shutdown().

211 {
212  xmlCleanupEncodingAliases();
213 }

◆ charset_to_utf8()

int charset_to_utf8 ( Octstr from,
Octstr **  to,
Octstr charset_from 
)

Definition at line 526 of file charset.c.

References from, handler, octstr_compare(), octstr_create_from_data, octstr_duplicate, octstr_get_cstr, octstr_imm(), and octstr_len().

Referenced by octstr_recode(), and set_charset().

527 {
528  int ret;
529  xmlCharEncodingHandlerPtr handler = NULL;
530  xmlBufferPtr frombuffer = NULL;
531  xmlBufferPtr tobuffer = NULL;
532 
533  if (octstr_compare(charset_from, octstr_imm("UTF-8")) == 0) {
534  *to = octstr_duplicate(from);
535  return 0;
536  }
537 
538  handler = xmlFindCharEncodingHandler(octstr_get_cstr(charset_from));
539  if (handler == NULL)
540  return -2;
541 
542  /* Build the libxml buffers for the transcoding. */
543  tobuffer = xmlBufferCreate();
544  frombuffer = xmlBufferCreate();
545  xmlBufferAdd(frombuffer, (unsigned char*)octstr_get_cstr(from), octstr_len(from));
546 
547  ret = xmlCharEncInFunc(handler, tobuffer, frombuffer);
548 
549  *to = octstr_create_from_data((char*)tobuffer->content, tobuffer->use);
550 
551  /* Memory cleanup. */
552  xmlBufferFree(tobuffer);
553  xmlBufferFree(frombuffer);
554 
555  return ret;
556 }
#define octstr_get_cstr(ostr)
Definition: octstr.h:233
static Octstr * from
Definition: mtbatch.c:95
Octstr * octstr_imm(const char *cstr)
Definition: octstr.c:283
#define octstr_duplicate(ostr)
Definition: octstr.h:187
long octstr_len(const Octstr *ostr)
Definition: octstr.c:342
SMPP_PDU *(* handler)(ESME *, SMPP_PDU *)
Definition: drive_smpp.c:213
#define octstr_create_from_data(data, len)
Definition: octstr.h:134
int octstr_compare(const Octstr *ostr1, const Octstr *ostr2)
Definition: octstr.c:871

◆ charset_utf8_to_gsm()

void charset_utf8_to_gsm ( Octstr ostr)

Convert octet string in UTF-8 format to GSM 03.38. Because not all UTF-8 charater can be converted to GSM 03.38 non convertable character replaces with NRP character (see define above). Special characters will be formed into escape sequences. Incomplete UTF-8 characters at the end of the string will be skipped.

Definition at line 288 of file charset.c.

References latin1_to_gsm, NRP, octstr_append(), octstr_append_char(), octstr_create, octstr_destroy(), octstr_get_char(), octstr_len(), octstr_truncate(), and warning().

Referenced by at2_pdu_encode(), extract_msgdata_part_by_coding(), handle_mt_dcs(), main(), msg_to_emimsg(), msg_to_pdu(), ois_append_sm_text(), packet_encode_message(), and sms_msgdata_len().

289 {
290  long pos, len;
291  int val1, val2;
292  Octstr *newostr;
293 
294  if (ostr == NULL)
295  return;
296 
297  newostr = octstr_create("");
298  len = octstr_len(ostr);
299 
300  for (pos = 0; pos < len; pos++) {
301  val1 = octstr_get_char(ostr, pos);
302 
303  /* check range */
304  if (val1 < 0 || val1 > 255) {
305  warning(0, "Char (0x%02x) in UTF-8 string not in the range (0, 255). Skipped.", val1);
306  continue;
307  }
308 
309  /* Convert UTF-8 to unicode code */
310 
311  /* test if two byte utf8 char */
312  if ((val1 & 0xE0) == 0xC0) {
313  /* test if incomplete utf char */
314  if(pos + 1 < len) {
315  val2 = octstr_get_char(ostr, ++pos);
316  val1 = (((val1 & ~0xC0) << 6) | (val2 & 0x3F));
317  } else {
318  /* incomplete, ignore it */
319  warning(0, "Incomplete UTF-8 char discovered, skipped. 1");
320  pos += 1;
321  continue;
322  }
323  } else if ((val1 & 0xF0) == 0xE0) { /* test for three byte utf8 char */
324  if(pos + 2 < len) {
325  val2 = octstr_get_char(ostr, ++pos);
326  val1 = (((val1 & ~0xE0) << 6) | (val2 & 0x3F));
327  val2 = octstr_get_char(ostr, ++pos);
328  val1 = (val1 << 6) | (val2 & 0x3F);
329  } else {
330  /* incomplete, ignore it */
331  warning(0, "Incomplete UTF-8 char discovered, skipped. 2");
332  pos += 2;
333  continue;
334  }
335  }
336 
337  /* test Latin code page 1 char */
338  if(val1 <= 255) {
339  val1 = latin1_to_gsm[val1];
340  /* needs to be escaped ? */
341  if(val1 < 0) {
342  octstr_append_char(newostr, 27);
343  val1 *= -1;
344  }
345  } else {
346  /* Its not a Latin1 char, test for allowed GSM chars */
347  switch(val1) {
348  case 0x394:
349  val1 = 0x10; /* GREEK CAPITAL LETTER DELTA */
350  break;
351  case 0x3A6:
352  val1 = 0x12; /* GREEK CAPITAL LETTER PHI */
353  break;
354  case 0x393:
355  val1 = 0x13; /* GREEK CAPITAL LETTER GAMMA */
356  break;
357  case 0x39B:
358  val1 = 0x14; /* GREEK CAPITAL LETTER LAMBDA */
359  break;
360  case 0x3A9:
361  val1 = 0x15; /* GREEK CAPITAL LETTER OMEGA */
362  break;
363  case 0x3A0:
364  val1 = 0x16; /* GREEK CAPITAL LETTER PI */
365  break;
366  case 0x3A8:
367  val1 = 0x17; /* GREEK CAPITAL LETTER PSI */
368  break;
369  case 0x3A3:
370  val1 = 0x18; /* GREEK CAPITAL LETTER SIGMA */
371  break;
372  case 0x398:
373  val1 = 0x19; /* GREEK CAPITAL LETTER THETA */
374  break;
375  case 0x39E:
376  val1 = 0x1A; /* GREEK CAPITAL LETTER XI */
377  break;
378  case 0x20AC:
379  val1 = 'e'; /* EURO SIGN */
380  octstr_append_char(newostr, 27);
381  break;
382  default: val1 = NRP; /* character cannot be represented in GSM 03.38 */
383  }
384  }
385  octstr_append_char(newostr, val1);
386  }
387 
388  octstr_truncate(ostr, 0);
389  octstr_append(ostr, newostr);
390  octstr_destroy(newostr);
391 }
void octstr_append(Octstr *ostr1, const Octstr *ostr2)
Definition: octstr.c:1504
void octstr_append_char(Octstr *ostr, int ch)
Definition: octstr.c:1517
static const int latin1_to_gsm[256]
Definition: latin1_to_gsm.h:4
#define NRP
Definition: charset.c:73
void warning(int err, const char *fmt,...)
Definition: log.c:660
void octstr_destroy(Octstr *ostr)
Definition: octstr.c:324
#define octstr_create(cstr)
Definition: octstr.h:125
long octstr_len(const Octstr *ostr)
Definition: octstr.c:342
Definition: octstr.c:118
void octstr_truncate(Octstr *ostr, int new_len)
Definition: octstr.c:1327
int octstr_get_char(const Octstr *ostr, long pos)
Definition: octstr.c:406

Variable Documentation

◆ chars_aliases

alias_t chars_aliases[]
Initial value:
= {
{ "CP1250", "WIN-1250" },
{ "CP1250", "WINDOWS-1250" },
{ "CP1251", "WIN-1251" },
{ "CP1251", "WINDOWS-1251" },
{ "CP1252", "WIN-1252" },
{ "CP1252", "WINDOWS-1252" },
{ "CP1253", "WIN-1253" },
{ "CP1253", "WINDOWS-1253" },
{ "CP1254", "WIN-1254" },
{ "CP1254", "WINDOWS-1254" },
{ "CP1257", "WIN-1257" },
{ "CP1257", "WINDOWS-1257" },
{ NULL }
}

Definition at line 184 of file charset.c.

Referenced by charset_init().

◆ gsm_esctolatin1

const { ... } gsm_esctolatin1[]
Initial value:
= {
{ 10, 12 },
{ 20, '^' },
{ 40, '{' },
{ 41, '}' },
{ 47, '\\' },
{ 60, '[' },
{ 61, '~' },
{ 62, ']' },
{ 64, '|' },
{ 101, 128 },
{ -1, -1 }
}

Referenced by charset_gsm_to_latin1().

◆ gsm_esctouni

const { ... } gsm_esctouni[]
Initial value:
= {
{ 10, 12 },
{ 20, '^' },
{ 40, '{' },
{ 41, '}' },
{ 47, '\\' },
{ 60, '[' },
{ 61, '~' },
{ 62, ']' },
{ 64, '|' },
{ 'e', 0x20AC },
{ -1, -1 }
}

Struct maps escaped GSM chars to unicode codeposition.

Referenced by charset_gsm_to_utf8().

◆ gsm_to_latin1

const unsigned char gsm_to_latin1[128]
static
Initial value:
= {
'@', 0xa3, '$', 0xa5, 0xe8, 0xe9, 0xf9, 0xec,
0xf2, 0xc7, 10, 0xd8, 0xf8, 13, 0xc5, 0xe5,
'?', '_', '?', '?', '?', '?', '?', '?',
'?', '?', '?', ' ', 0xc6, 0xe6, 0xdf, 0xc9,
' ', '!', '"', '#', 0xa4, '%', '&', '\'',
'(', ')', '*', '+', ',', '-', '.', '/',
'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', ':', ';', '<', '=', '>', '?',
0xa1, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
'X', 'Y', 'Z', 0xc4, 0xd6, 0xd1, 0xdc, 0xa7,
0xbf, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
'x', 'y', 'z', 0xe4, 0xf6, 0xf1, 0xfc, 0xe0
}

Definition at line 128 of file charset.c.

Referenced by charset_gsm_to_latin1().

◆ gsm_to_unicode

const int gsm_to_unicode[128]
static
Initial value:
= {
'@', 0xA3, '$', 0xA5, 0xE8, 0xE9, 0xF9, 0xEC,
0xF2, 0xC7, 10, 0xd8, 0xF8, 13, 0xC5, 0xE5,
0x394, '_', 0x3A6, 0x393, 0x39B, 0x3A9, 0x3A0, 0x3A8,
0x3A3, 0x398, 0x39E, NRP, 0xC6, 0xE6, 0xDF, 0xC9,
' ', '!', '"', '#', 0xA4, '%', '&', '\'',
'(', ')', '*', '+', ',', '-', '.', '/',
'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', ':', ';', '<', '=', '>', '?',
0xA1, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
'X', 'Y', 'Z', 0xC4, 0xD6, 0xD1, 0xDC, 0xA7,
0xBF, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
'x', 'y', 'z', 0xE4, 0xF6, 0xF1, 0xFC, 0xE0
}
#define NRP
Definition: charset.c:73

Map GSM default alphabet characters to unicode codeposition. The escape character, at position 27, is mapped to a NRP, though normally the function that indexes into this table will treat it specially.

Definition at line 153 of file charset.c.

Referenced by charset_gsm_to_utf8().

◆ gsmesc

int gsmesc

Definition at line 84 of file charset.c.

Referenced by charset_gsm_to_latin1(), and charset_gsm_to_utf8().

◆ latin1

int latin1

Definition at line 85 of file charset.c.

Referenced by convert_html_entity(), and ws_bc_encode().

◆ unichar

int unichar

Definition at line 106 of file charset.c.

See file LICENSE for details about the license agreement for using, modifying, copying or deriving work from this software.