Kannel: Open Source WAP and SMS gateway  svn-r5335
html.c File Reference
#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include "html.h"
#include "gwlib/gwlib.h"

Go to the source code of this file.

Macros

#define SMS_MAX   161
 

Functions

static int html_comment_begins (Octstr *html, long pos)
 
static void skip_html_comment (Octstr *html, long *pos)
 
static void skip_html_tag (Octstr *html, long *pos)
 
static void convert_html_entity (Octstr *sms, Octstr *html, long *pos)
 
Octstrhtml_to_sms (Octstr *html)
 

Macro Definition Documentation

◆ SMS_MAX

#define SMS_MAX   161

Definition at line 71 of file html.c.

Function Documentation

◆ convert_html_entity()

static void convert_html_entity ( Octstr sms,
Octstr html,
long *  pos 
)
static

Definition at line 129 of file html.c.

References code, latin1, octstr_append_char(), octstr_get_char(), octstr_get_many_chars(), and octstr_parse_long().

Referenced by html_to_sms().

130 {
131  static struct {
132  char *entity;
133  int latin1;
134  }
135  tab[] = {
136  { "&amp;", '&' },
137  { "&lt;", '<' },
138  { "&gt;", '>' },
139 
140  /* The following is copied from
141 
142  http://www.hut.fi/~jkorpela/HTML3.2/latin1.html
143 
144  by Jukka Korpela. Hand and script edited to form this
145  table. */
146 
147  { "&nbsp;", ' ' },
148  { "&iexcl;", 161 },
149  { "&cent;", 162 },
150  { "&pound;", 163 },
151  { "&curren;", 164 },
152  { "&yen;", 165 },
153  { "&brvbar;", 166 },
154  { "&sect;", 167 },
155  { "&uml;", 168 },
156  { "&copy;", 169 },
157  { "&ordf;", 170 },
158  { "&laquo;", 171 },
159  { "&not;", 172 },
160  { "&shy;", 173 },
161  { "&reg;", 174 },
162  { "&macr;", 175 },
163  { "&deg;", 176 },
164  { "&plusmn;", 177 },
165  { "&sup2;", 178 },
166  { "&sup3;", 179 },
167  { "&acute;", 180 },
168  { "&micro;", 181 },
169  { "&para;", 182 },
170  { "&middot;", 183 },
171  { "&cedil;", 184 },
172  { "&sup1;", 185 },
173  { "&ordm;", 186 },
174  { "&raquo;", 187 },
175  { "&frac14;", 188 },
176  { "&frac12;", 189 },
177  { "&frac34;", 190 },
178  { "&iquest;", 191 },
179  { "&Agrave;", 192 },
180  { "&Aacute;", 193 },
181  { "&Acirc;", 194 },
182  { "&Atilde;", 195 },
183  { "&Auml;", 196 },
184  { "&Aring;", 197 },
185  { "&AElig;", 198 },
186  { "&Ccedil;", 199 },
187  { "&Egrave;", 200 },
188  { "&Eacute;", 201 },
189  { "&Ecirc;", 202 },
190  { "&Euml;", 203 },
191  { "&Igrave;", 204 },
192  { "&Iacute;", 205 },
193  { "&Icirc;", 206 },
194  { "&Iuml;", 207 },
195  { "&ETH;", 208 },
196  { "&Ntilde;", 209 },
197  { "&Ograve;", 210 },
198  { "&Oacute;", 211 },
199  { "&Ocirc;", 212 },
200  { "&Otilde;", 213 },
201  { "&Ouml;", 214 },
202  { "&times;", 215 },
203  { "&Oslash;", 216 },
204  { "&Ugrave;", 217 },
205  { "&Uacute;", 218 },
206  { "&Ucirc;", 219 },
207  { "&Uuml;", 220 },
208  { "&Yacute;", 221 },
209  { "&THORN;", 222 },
210  { "&szlig;", 223 },
211  { "&agrave;", 224 },
212  { "&aacute;", 225 },
213  { "&acirc;", 226 },
214  { "&atilde;", 227 },
215  { "&auml;", 228 },
216  { "&aring;", 229 },
217  { "&aelig;", 230 },
218  { "&ccedil;", 231 },
219  { "&egrave;", 232 },
220  { "&eacute;", 233 },
221  { "&ecirc;", 234 },
222  { "&euml;", 235 },
223  { "&igrave;", 236 },
224  { "&iacute;", 237 },
225  { "&icirc;", 238 },
226  { "&iuml;", 239 },
227  { "&eth;", 240 },
228  { "&ntilde;", 241 },
229  { "&ograve;", 242 },
230  { "&oacute;", 243 },
231  { "&ocirc;", 244 },
232  { "&otilde;", 245 },
233  { "&ouml;", 246 },
234  { "&divide;", 247 },
235  { "&oslash;", 248 },
236  { "&ugrave;", 249 },
237  { "&uacute;", 250 },
238  { "&ucirc;", 251 },
239  { "&uuml;", 252 },
240  { "&yacute;", 253 },
241  { "&thorn;", 254 },
242  { "&yuml;", 255 },
243  };
244  int num_tab = sizeof(tab) / sizeof(tab[0]);
245  long i, code;
246  size_t len;
247  char buf[1024];
248 
249  if (octstr_get_char(html, *pos + 1) == '#') {
250  if (octstr_get_char(html, *pos + 2) == 'x' || octstr_get_char(html, *pos + 2) == 'X')
251  i = octstr_parse_long(&code, html, *pos + 3, 16); /* hex */
252  else
253  i = octstr_parse_long(&code, html, *pos + 2, 10); /* decimal */
254  if (i > 0) {
255  if (code < 256)
256  octstr_append_char(sms, code);
257  *pos = i + 1;
258  if (octstr_get_char(html, *pos) == ';')
259  ++(*pos);
260  } else {
261  ++(*pos);
262  octstr_append_char(sms, '&');
263  }
264  } else {
265  for (i = 0; i < num_tab; ++i) {
266  len = strlen(tab[i].entity);
267  octstr_get_many_chars(buf, html, *pos, len);
268  buf[len] = '\0';
269  if (strcmp(buf, tab[i].entity) == 0) {
270  *pos += len;
271  octstr_append_char(sms, tab[i].latin1);
272  break;
273  }
274  }
275  if (i == num_tab) {
276  ++(*pos);
277  octstr_append_char(sms, '&');
278  }
279  }
280 }
void octstr_append_char(Octstr *ostr, int ch)
Definition: octstr.c:1517
int code
Definition: smsc_cimd2.c:346
int latin1
Definition: charset.c:85
long octstr_parse_long(long *nump, Octstr *ostr, long pos, int base)
Definition: octstr.c:749
void octstr_get_many_chars(char *buf, Octstr *ostr, long pos, long len)
Definition: octstr.c:425
int octstr_get_char(const Octstr *ostr, long pos)
Definition: octstr.c:406

◆ html_comment_begins()

static int html_comment_begins ( Octstr html,
long  pos 
)
static

Definition at line 75 of file html.c.

References octstr_get_many_chars().

Referenced by html_to_sms().

76 {
77  char buf[10];
78 
79  octstr_get_many_chars(buf, html, pos, 4);
80  buf[5] = '\0';
81  return strcmp(buf, "<!--") == 0;
82 }
void octstr_get_many_chars(char *buf, Octstr *ostr, long pos, long len)
Definition: octstr.c:425

◆ html_to_sms()

Octstr* html_to_sms ( Octstr html)

Definition at line 283 of file html.c.

References convert_html_entity(), html_comment_begins(), octstr_append_char(), octstr_create, octstr_get_char(), octstr_len(), octstr_shrink_blanks(), octstr_strip_blanks(), skip_html_comment(), and skip_html_tag().

Referenced by smsbox_sendsms_post(), and url_result_thread().

284 {
285  long i, len;
286  int c;
287  Octstr *sms;
288 
289  sms = octstr_create("");
290  len = octstr_len(html);
291  i = 0;
292  while (i < len) {
293  c = octstr_get_char(html, i);
294  switch (c) {
295  case '<':
296  if (html_comment_begins(html, i))
297  skip_html_comment(html, &i);
298  else
299  skip_html_tag(html, &i);
300  break;
301  case '&':
302  convert_html_entity(sms, html, &i);
303  break;
304  default:
305  octstr_append_char(sms, c);
306  ++i;
307  break;
308  }
309  }
311  octstr_strip_blanks(sms);
312  return sms;
313 }
void octstr_append_char(Octstr *ostr, int ch)
Definition: octstr.c:1517
static void skip_html_tag(Octstr *html, long *pos)
Definition: html.c:100
void octstr_strip_blanks(Octstr *text)
Definition: octstr.c:1346
static int html_comment_begins(Octstr *html, long pos)
Definition: html.c:75
#define octstr_create(cstr)
Definition: octstr.h:125
long octstr_len(const Octstr *ostr)
Definition: octstr.c:342
Definition: octstr.c:118
static void convert_html_entity(Octstr *sms, Octstr *html, long *pos)
Definition: html.c:129
int octstr_get_char(const Octstr *ostr, long pos)
Definition: octstr.c:406
void octstr_shrink_blanks(Octstr *text)
Definition: octstr.c:1433
static void skip_html_comment(Octstr *html, long *pos)
Definition: html.c:86

◆ skip_html_comment()

static void skip_html_comment ( Octstr html,
long *  pos 
)
static

Definition at line 86 of file html.c.

References octstr_imm(), octstr_len(), and octstr_search().

Referenced by html_to_sms().

87 {
88  long i;
89 
90  *pos += 4; /* Skip "<!--" at beginning of comment. */
91  i = octstr_search(html, octstr_imm("-->"), *pos);
92  if (i == -1)
93  *pos = octstr_len(html);
94  else
95  *pos = i;
96 }
long octstr_search(const Octstr *haystack, const Octstr *needle, long pos)
Definition: octstr.c:1070
Octstr * octstr_imm(const char *cstr)
Definition: octstr.c:283
long octstr_len(const Octstr *ostr)
Definition: octstr.c:342

◆ skip_html_tag()

static void skip_html_tag ( Octstr html,
long *  pos 
)
static

Definition at line 100 of file html.c.

References octstr_get_char(), octstr_len(), and octstr_search_char().

Referenced by html_to_sms().

101 {
102  long i, len;
103  int c;
104 
105  /* Skip leading '<'. */
106  ++(*pos);
107 
108  /* Skip name of tag and attributes with values. */
109  len = octstr_len(html);
110  while (*pos < len && (c = octstr_get_char(html, *pos)) != '>') {
111  if (c == '"' || c == '\'') {
112  i = octstr_search_char(html, c, *pos + 1);
113  if (i == -1)
114  *pos = len;
115  else
116  *pos = i + 1;
117  } else
118  ++(*pos);
119  }
120 
121  /* Skip trailing '>' if it is there. */
122  if (octstr_get_char(html, *pos) == '>')
123  ++(*pos);
124 }
long octstr_search_char(const Octstr *ostr, int ch, long pos)
Definition: octstr.c:1012
long octstr_len(const Octstr *ostr)
Definition: octstr.c:342
int octstr_get_char(const Octstr *ostr, long pos)
Definition: octstr.c:406
See file LICENSE for details about the license agreement for using, modifying, copying or deriving work from this software.