template_utils.c 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385
  1. /*
  2. * gluon-web Template - Utility functions
  3. *
  4. * Copyright (C) 2010 Jo-Philipp Wich <jow@openwrt.org>
  5. * Copyright (C) 2018 Matthias Schiffer <mschiffer@universe-factory.net>
  6. *
  7. * Licensed under the Apache License, Version 2.0 (the "License");
  8. * you may not use this file except in compliance with the License.
  9. * You may obtain a copy of the License at
  10. *
  11. * http://www.apache.org/licenses/LICENSE-2.0
  12. *
  13. * Unless required by applicable law or agreed to in writing, software
  14. * distributed under the License is distributed on an "AS IS" BASIS,
  15. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. * See the License for the specific language governing permissions and
  17. * limitations under the License.
  18. */
  19. #include "template_utils.h"
  20. #include "template_lmo.h"
  21. /* initialize a buffer object */
  22. struct template_buffer * buf_init(int size)
  23. {
  24. struct template_buffer *buf;
  25. if (size <= 0)
  26. size = 1024;
  27. buf = malloc(sizeof(*buf));
  28. if (buf != NULL)
  29. {
  30. buf->fill = 0;
  31. buf->size = size;
  32. buf->data = malloc(buf->size);
  33. if (buf->data != NULL)
  34. {
  35. buf->dptr = buf->data;
  36. buf->data[0] = 0;
  37. return buf;
  38. }
  39. free(buf);
  40. }
  41. return NULL;
  42. }
  43. /* grow buffer */
  44. static int buf_grow(struct template_buffer *buf, int size)
  45. {
  46. unsigned int off = (buf->dptr - buf->data);
  47. char *data;
  48. if (size <= 0)
  49. size = 1024;
  50. data = realloc(buf->data, buf->size + size);
  51. if (data != NULL)
  52. {
  53. buf->data = data;
  54. buf->dptr = data + off;
  55. buf->size += size;
  56. return buf->size;
  57. }
  58. return 0;
  59. }
  60. /* put one char into buffer object */
  61. static int buf_putchar(struct template_buffer *buf, char c)
  62. {
  63. if( ((buf->fill + 1) >= buf->size) && !buf_grow(buf, 0) )
  64. return 0;
  65. *(buf->dptr++) = c;
  66. *(buf->dptr) = 0;
  67. buf->fill++;
  68. return 1;
  69. }
  70. /* append data to buffer */
  71. int buf_append(struct template_buffer *buf, const char *s, int len)
  72. {
  73. if ((buf->fill + len + 1) >= buf->size)
  74. {
  75. if (!buf_grow(buf, len + 1))
  76. return 0;
  77. }
  78. memcpy(buf->dptr, s, len);
  79. buf->fill += len;
  80. buf->dptr += len;
  81. *(buf->dptr) = 0;
  82. return len;
  83. }
  84. /* destroy buffer object and return pointer to data */
  85. char * buf_destroy(struct template_buffer *buf)
  86. {
  87. char *data = buf->data;
  88. free(buf);
  89. return data;
  90. }
  91. /* calculate the number of expected continuation chars */
  92. static inline int mb_num_chars(unsigned char c)
  93. {
  94. if ((c & 0xE0) == 0xC0)
  95. return 2;
  96. else if ((c & 0xF0) == 0xE0)
  97. return 3;
  98. else if ((c & 0xF8) == 0xF0)
  99. return 4;
  100. else if ((c & 0xFC) == 0xF8)
  101. return 5;
  102. else if ((c & 0xFE) == 0xFC)
  103. return 6;
  104. return 1;
  105. }
  106. /* test whether the given byte is a valid continuation char */
  107. static inline int mb_is_cont(unsigned char c)
  108. {
  109. return ((c >= 0x80) && (c <= 0xBF));
  110. }
  111. /* test whether the byte sequence at the given pointer with the given
  112. * length is the shortest possible representation of the code point */
  113. static inline int mb_is_shortest(unsigned char *s, int n)
  114. {
  115. switch (n)
  116. {
  117. case 2:
  118. /* 1100000x (10xxxxxx) */
  119. return !(((*s >> 1) == 0x60) &&
  120. ((*(s+1) >> 6) == 0x02));
  121. case 3:
  122. /* 11100000 100xxxxx (10xxxxxx) */
  123. return !((*s == 0xE0) &&
  124. ((*(s+1) >> 5) == 0x04) &&
  125. ((*(s+2) >> 6) == 0x02));
  126. case 4:
  127. /* 11110000 1000xxxx (10xxxxxx 10xxxxxx) */
  128. return !((*s == 0xF0) &&
  129. ((*(s+1) >> 4) == 0x08) &&
  130. ((*(s+2) >> 6) == 0x02) &&
  131. ((*(s+3) >> 6) == 0x02));
  132. case 5:
  133. /* 11111000 10000xxx (10xxxxxx 10xxxxxx 10xxxxxx) */
  134. return !((*s == 0xF8) &&
  135. ((*(s+1) >> 3) == 0x10) &&
  136. ((*(s+2) >> 6) == 0x02) &&
  137. ((*(s+3) >> 6) == 0x02) &&
  138. ((*(s+4) >> 6) == 0x02));
  139. case 6:
  140. /* 11111100 100000xx (10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx) */
  141. return !((*s == 0xF8) &&
  142. ((*(s+1) >> 2) == 0x20) &&
  143. ((*(s+2) >> 6) == 0x02) &&
  144. ((*(s+3) >> 6) == 0x02) &&
  145. ((*(s+4) >> 6) == 0x02) &&
  146. ((*(s+5) >> 6) == 0x02));
  147. }
  148. return 1;
  149. }
  150. /* test whether the byte sequence at the given pointer with the given
  151. * length is an UTF-16 surrogate */
  152. static inline int mb_is_surrogate(unsigned char *s, int n)
  153. {
  154. return ((n == 3) && (*s == 0xED) && (*(s+1) >= 0xA0) && (*(s+1) <= 0xBF));
  155. }
  156. /* test whether the byte sequence at the given pointer with the given
  157. * length is an illegal UTF-8 code point */
  158. static inline int mb_is_illegal(unsigned char *s, int n)
  159. {
  160. return ((n == 3) && (*s == 0xEF) && (*(s+1) == 0xBF) &&
  161. (*(s+2) >= 0xBE) && (*(s+2) <= 0xBF));
  162. }
  163. /* scan given source string, validate UTF-8 sequence and store result
  164. * in given buffer object */
  165. static int validate_utf8(unsigned char **s, unsigned int l, struct template_buffer *buf)
  166. {
  167. unsigned char *ptr = *s;
  168. unsigned int o = 0, v, n;
  169. /* ascii byte without null */
  170. if ((*(ptr+0) >= 0x01) && (*(ptr+0) <= 0x7F))
  171. {
  172. if (!buf_putchar(buf, *ptr++))
  173. return 0;
  174. o = 1;
  175. }
  176. /* multi byte sequence */
  177. else if ((n = mb_num_chars(*ptr)) > 1)
  178. {
  179. /* count valid chars */
  180. for (v = 1; (v <= n) && ((o+v) < l) && mb_is_cont(*(ptr+v)); v++);
  181. switch (n)
  182. {
  183. case 6:
  184. case 5:
  185. /* five and six byte sequences are always invalid */
  186. if (!buf_putchar(buf, '?'))
  187. return 0;
  188. break;
  189. default:
  190. /* if the number of valid continuation bytes matches the
  191. * expected number and if the sequence is legal, copy
  192. * the bytes to the destination buffer */
  193. if ((v == n) && mb_is_shortest(ptr, n) &&
  194. !mb_is_surrogate(ptr, n) && !mb_is_illegal(ptr, n))
  195. {
  196. /* copy sequence */
  197. if (!buf_append(buf, (char *)ptr, n))
  198. return 0;
  199. }
  200. /* the found sequence is illegal, skip it */
  201. else
  202. {
  203. /* invalid sequence */
  204. if (!buf_putchar(buf, '?'))
  205. return 0;
  206. }
  207. break;
  208. }
  209. /* advance beyound the last found valid continuation char */
  210. o = v;
  211. ptr += v;
  212. }
  213. /* invalid byte (0x00) */
  214. else
  215. {
  216. if (!buf_putchar(buf, '?')) /* or 0xEF, 0xBF, 0xBD */
  217. return 0;
  218. o = 1;
  219. ptr++;
  220. }
  221. *s = ptr;
  222. return o;
  223. }
  224. /* Sanitize given string and strip all invalid XML bytes
  225. * Validate UTF-8 sequences
  226. * Escape XML control chars */
  227. char * pcdata(const char *s, unsigned int l)
  228. {
  229. struct template_buffer *buf = buf_init(l);
  230. unsigned char *ptr = (unsigned char *)s;
  231. unsigned int o, v;
  232. char esq[8];
  233. int esl;
  234. if (!buf)
  235. return NULL;
  236. for (o = 0; o < l; o++)
  237. {
  238. /* Invalid XML bytes */
  239. if ((*ptr <= 0x08) ||
  240. ((*ptr >= 0x0B) && (*ptr <= 0x0C)) ||
  241. ((*ptr >= 0x0E) && (*ptr <= 0x1F)) ||
  242. (*ptr == 0x7F))
  243. {
  244. ptr++;
  245. }
  246. /* Escapes */
  247. else if ((*ptr == 0x26) ||
  248. (*ptr == 0x27) ||
  249. (*ptr == 0x22) ||
  250. (*ptr == 0x3C) ||
  251. (*ptr == 0x3E))
  252. {
  253. esl = snprintf(esq, sizeof(esq), "&#%i;", *ptr);
  254. if (!buf_append(buf, esq, esl))
  255. break;
  256. ptr++;
  257. }
  258. /* ascii char */
  259. else if (*ptr <= 0x7F)
  260. {
  261. buf_putchar(buf, (char)*ptr++);
  262. }
  263. /* multi byte sequence */
  264. else
  265. {
  266. if (!(v = validate_utf8(&ptr, l - o, buf)))
  267. break;
  268. o += (v - 1);
  269. }
  270. }
  271. return buf_destroy(buf);
  272. }
  273. void luastr_escape(struct template_buffer *out, const char *s, unsigned int l, int escape_xml)
  274. {
  275. int esl;
  276. char esq[8];
  277. char *ptr;
  278. for (ptr = (char *)s; ptr < (s + l); ptr++)
  279. {
  280. switch (*ptr)
  281. {
  282. case '\\':
  283. buf_append(out, "\\\\", 2);
  284. break;
  285. case '"':
  286. if (escape_xml)
  287. buf_append(out, "&#34;", 5);
  288. else
  289. buf_append(out, "\\\"", 2);
  290. break;
  291. case '\n':
  292. buf_append(out, "\\n", 2);
  293. break;
  294. case '\'':
  295. case '&':
  296. case '<':
  297. case '>':
  298. if (escape_xml)
  299. {
  300. esl = snprintf(esq, sizeof(esq), "&#%i;", *ptr);
  301. buf_append(out, esq, esl);
  302. break;
  303. }
  304. default:
  305. buf_putchar(out, *ptr);
  306. }
  307. }
  308. }
  309. void luastr_translate(struct template_buffer *out, const char *s, unsigned int l, int escape_xml)
  310. {
  311. char *tr;
  312. int trlen;
  313. if (!lmo_translate(s, l, &tr, &trlen))
  314. luastr_escape(out, tr, trlen, escape_xml);
  315. else
  316. luastr_escape(out, s, l, escape_xml);
  317. }