template_utils.c 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366
  1. /*
  2. * gluon-web Template - Utility functions
  3. *
  4. * Copyright (C) 2010 Jo-Philipp Wich <jow@openwrt.org>
  5. * Copyright (C) 2018 Matthias Schiffer <mschiffer@universe-factory.net>
  6. *
  7. * Licensed under the Apache License, Version 2.0 (the "License");
  8. * you may not use this file except in compliance with the License.
  9. * You may obtain a copy of the License at
  10. *
  11. * http://www.apache.org/licenses/LICENSE-2.0
  12. *
  13. * Unless required by applicable law or agreed to in writing, software
  14. * distributed under the License is distributed on an "AS IS" BASIS,
  15. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. * See the License for the specific language governing permissions and
  17. * limitations under the License.
  18. */
  19. #include "template_utils.h"
  20. #include "template_lmo.h"
  21. #include <stdlib.h>
  22. #include <stdio.h>
  23. #include <string.h>
  24. /* initialize a buffer object */
  25. struct template_buffer * buf_init(size_t size)
  26. {
  27. struct template_buffer *buf = malloc(sizeof(*buf));
  28. if (buf != NULL) {
  29. buf->size = size;
  30. buf->data = malloc(buf->size);
  31. buf->dptr = buf->data;
  32. if (buf->data != NULL || size == 0)
  33. return buf;
  34. free(buf);
  35. }
  36. return NULL;
  37. }
  38. /* grow buffer */
  39. static bool buf_grow(struct template_buffer *buf, size_t len)
  40. {
  41. size_t off = buf->dptr - buf->data, left = buf->size - off;
  42. if (len <= left)
  43. return true;
  44. size_t diff = len - left;
  45. if (diff < 1024)
  46. diff = 1024;
  47. char *data = realloc(buf->data, buf->size + diff);
  48. if (data == NULL)
  49. return false;
  50. buf->data = data;
  51. buf->dptr = data + off;
  52. buf->size += diff;
  53. return true;
  54. }
  55. /* put one char into buffer object */
  56. static bool buf_putchar(struct template_buffer *buf, char c)
  57. {
  58. if (!buf_grow(buf, 1))
  59. return false;
  60. *(buf->dptr++) = c;
  61. return true;
  62. }
  63. /* append data to buffer */
  64. bool buf_append(struct template_buffer *buf, const char *s, size_t len)
  65. {
  66. if (!buf_grow(buf, len))
  67. return false;
  68. memcpy(buf->dptr, s, len);
  69. buf->dptr += len;
  70. return true;
  71. }
  72. /* destroy buffer object and return pointer to data */
  73. char * buf_destroy(struct template_buffer *buf)
  74. {
  75. char *data = buf->data;
  76. free(buf);
  77. return data;
  78. }
  79. /* calculate the number of expected continuation chars */
  80. static inline size_t mb_num_chars(unsigned char c)
  81. {
  82. if ((c & 0xE0) == 0xC0)
  83. return 2;
  84. else if ((c & 0xF0) == 0xE0)
  85. return 3;
  86. else if ((c & 0xF8) == 0xF0)
  87. return 4;
  88. else if ((c & 0xFC) == 0xF8)
  89. return 5;
  90. else if ((c & 0xFE) == 0xFC)
  91. return 6;
  92. return 1;
  93. }
  94. /* test whether the given byte is a valid continuation char */
  95. static inline bool mb_is_cont(unsigned char c)
  96. {
  97. return ((c >= 0x80) && (c <= 0xBF));
  98. }
  99. /* test whether the byte sequence at the given pointer with the given
  100. * length is the shortest possible representation of the code point */
  101. static inline bool mb_is_shortest(const unsigned char *s, size_t n)
  102. {
  103. switch (n)
  104. {
  105. case 2:
  106. /* 1100000x (10xxxxxx) */
  107. return !(((*s >> 1) == 0x60) &&
  108. ((*(s+1) >> 6) == 0x02));
  109. case 3:
  110. /* 11100000 100xxxxx (10xxxxxx) */
  111. return !((*s == 0xE0) &&
  112. ((*(s+1) >> 5) == 0x04) &&
  113. ((*(s+2) >> 6) == 0x02));
  114. case 4:
  115. /* 11110000 1000xxxx (10xxxxxx 10xxxxxx) */
  116. return !((*s == 0xF0) &&
  117. ((*(s+1) >> 4) == 0x08) &&
  118. ((*(s+2) >> 6) == 0x02) &&
  119. ((*(s+3) >> 6) == 0x02));
  120. case 5:
  121. /* 11111000 10000xxx (10xxxxxx 10xxxxxx 10xxxxxx) */
  122. return !((*s == 0xF8) &&
  123. ((*(s+1) >> 3) == 0x10) &&
  124. ((*(s+2) >> 6) == 0x02) &&
  125. ((*(s+3) >> 6) == 0x02) &&
  126. ((*(s+4) >> 6) == 0x02));
  127. case 6:
  128. /* 11111100 100000xx (10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx) */
  129. return !((*s == 0xF8) &&
  130. ((*(s+1) >> 2) == 0x20) &&
  131. ((*(s+2) >> 6) == 0x02) &&
  132. ((*(s+3) >> 6) == 0x02) &&
  133. ((*(s+4) >> 6) == 0x02) &&
  134. ((*(s+5) >> 6) == 0x02));
  135. }
  136. return true;
  137. }
  138. /* test whether the byte sequence at the given pointer with the given
  139. * length is an UTF-16 surrogate */
  140. static inline bool mb_is_surrogate(const unsigned char *s, size_t n)
  141. {
  142. return ((n == 3) && (*s == 0xED) && (*(s+1) >= 0xA0) && (*(s+1) <= 0xBF));
  143. }
  144. /* test whether the byte sequence at the given pointer with the given
  145. * length is an illegal UTF-8 code point */
  146. static inline bool mb_is_illegal(const unsigned char *s, size_t n)
  147. {
  148. return ((n == 3) && (*s == 0xEF) && (*(s+1) == 0xBF) &&
  149. (*(s+2) >= 0xBE) && (*(s+2) <= 0xBF));
  150. }
  151. /* scan given source string, validate UTF-8 sequence and store result
  152. * in given buffer object */
  153. static size_t validate_utf8(const unsigned char **s, size_t l, struct template_buffer *buf)
  154. {
  155. const unsigned char *ptr = *s;
  156. size_t o = 0, v, n;
  157. /* ascii byte without null */
  158. if ((*(ptr+0) >= 0x01) && (*(ptr+0) <= 0x7F)) {
  159. if (!buf_putchar(buf, *ptr++))
  160. return 0;
  161. o = 1;
  162. }
  163. /* multi byte sequence */
  164. else if ((n = mb_num_chars(*ptr)) > 1) {
  165. /* count valid chars */
  166. for (v = 1; (v <= n) && ((o+v) < l) && mb_is_cont(*(ptr+v)); v++);
  167. switch (n)
  168. {
  169. case 6:
  170. case 5:
  171. /* five and six byte sequences are always invalid */
  172. if (!buf_putchar(buf, '?'))
  173. return 0;
  174. break;
  175. default:
  176. /* if the number of valid continuation bytes matches the
  177. * expected number and if the sequence is legal, copy
  178. * the bytes to the destination buffer */
  179. if ((v == n) && mb_is_shortest(ptr, n) &&
  180. !mb_is_surrogate(ptr, n) && !mb_is_illegal(ptr, n))
  181. {
  182. /* copy sequence */
  183. if (!buf_append(buf, (const char *)ptr, n))
  184. return 0;
  185. }
  186. /* the found sequence is illegal, skip it */
  187. else
  188. {
  189. /* invalid sequence */
  190. if (!buf_putchar(buf, '?'))
  191. return 0;
  192. }
  193. break;
  194. }
  195. /* advance beyound the last found valid continuation char */
  196. o = v;
  197. ptr += v;
  198. }
  199. /* invalid byte (0x00) */
  200. else {
  201. if (!buf_putchar(buf, '?')) /* or 0xEF, 0xBF, 0xBD */
  202. return 0;
  203. o = 1;
  204. ptr++;
  205. }
  206. *s = ptr;
  207. return o;
  208. }
  209. /* Sanitize given string and strip all invalid XML bytes
  210. * Validate UTF-8 sequences
  211. * Escape XML control chars */
  212. char * pcdata(const char *s, size_t l, size_t *outl)
  213. {
  214. struct template_buffer *buf = buf_init(l);
  215. const unsigned char *ptr = (const unsigned char *)s;
  216. size_t o, v;
  217. char esq[8];
  218. int esl;
  219. if (!buf)
  220. return NULL;
  221. for (o = 0; o < l; o++)
  222. {
  223. /* Invalid XML bytes */
  224. if ((*ptr <= 0x08) ||
  225. ((*ptr >= 0x0B) && (*ptr <= 0x0C)) ||
  226. ((*ptr >= 0x0E) && (*ptr <= 0x1F)) ||
  227. (*ptr == 0x7F))
  228. {
  229. ptr++;
  230. }
  231. /* Escapes */
  232. else if ((*ptr == 0x26) ||
  233. (*ptr == 0x27) ||
  234. (*ptr == 0x22) ||
  235. (*ptr == 0x3C) ||
  236. (*ptr == 0x3E))
  237. {
  238. esl = snprintf(esq, sizeof(esq), "&#%i;", *ptr);
  239. if (!buf_append(buf, esq, esl))
  240. break;
  241. ptr++;
  242. }
  243. /* ascii char */
  244. else if (*ptr <= 0x7F)
  245. {
  246. buf_putchar(buf, (char)*ptr++);
  247. }
  248. /* multi byte sequence */
  249. else
  250. {
  251. if (!(v = validate_utf8(&ptr, l - o, buf)))
  252. break;
  253. o += (v - 1);
  254. }
  255. }
  256. *outl = buf_length(buf);
  257. return buf_destroy(buf);
  258. }
  259. void luastr_escape(struct template_buffer *out, const char *s, size_t l, bool escape_xml)
  260. {
  261. int esl;
  262. char esq[8];
  263. const char *ptr;
  264. for (ptr = s; ptr < (s + l); ptr++)
  265. {
  266. switch (*ptr)
  267. {
  268. case '\\':
  269. buf_append(out, "\\\\", 2);
  270. break;
  271. case '"':
  272. if (escape_xml)
  273. buf_append(out, "&#34;", 5);
  274. else
  275. buf_append(out, "\\\"", 2);
  276. break;
  277. case '\n':
  278. buf_append(out, "\\n", 2);
  279. break;
  280. case '\'':
  281. case '&':
  282. case '<':
  283. case '>':
  284. if (escape_xml) {
  285. esl = snprintf(esq, sizeof(esq), "&#%i;", *ptr);
  286. buf_append(out, esq, esl);
  287. break;
  288. }
  289. default:
  290. buf_putchar(out, *ptr);
  291. }
  292. }
  293. }
  294. void luastr_translate(struct template_buffer *out, const char *s, size_t l, bool escape_xml)
  295. {
  296. char *tr;
  297. size_t trlen;
  298. if (!lmo_translate(s, l, &tr, &trlen))
  299. luastr_escape(out, tr, trlen, escape_xml);
  300. else
  301. luastr_escape(out, s, l, escape_xml);
  302. }