multipart_parser.c 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306
  1. /* Based on node-formidable by Felix Geisendörfer
  2. * Igor Afonov - afonov@gmail.com - 2012
  3. * MIT License - http://www.opensource.org/licenses/mit-license.php
  4. */
  5. #include "multipart_parser.h"
  6. #include <stdio.h>
  7. #include <stdarg.h>
  8. #include <string.h>
  9. static void multipart_log(const char * format, ...)
  10. {
  11. #ifdef DEBUG_MULTIPART
  12. va_list args;
  13. va_start(args, format);
  14. fprintf(stderr, "[HTTP_MULTIPART_PARSER] %s:%d: ", __FILE__, __LINE__);
  15. vfprintf(stderr, format, args);
  16. fprintf(stderr, "\n");
  17. #endif
  18. }
  19. #define NOTIFY_CB(FOR) \
  20. do { \
  21. if (p->settings->on_##FOR) { \
  22. if (p->settings->on_##FOR(p) != 0) { \
  23. return i; \
  24. } \
  25. } \
  26. } while (0)
  27. #define EMIT_DATA_CB(FOR, ptr, len) \
  28. do { \
  29. if (p->settings->on_##FOR) { \
  30. if (p->settings->on_##FOR(p, ptr, len) != 0) { \
  31. return i; \
  32. } \
  33. } \
  34. } while (0)
  35. #define LF 10
  36. #define CR 13
  37. struct multipart_parser {
  38. void * data;
  39. size_t index;
  40. size_t boundary_length;
  41. unsigned char state;
  42. const multipart_parser_settings* settings;
  43. char* lookbehind;
  44. char multipart_boundary[1];
  45. };
  46. enum state {
  47. s_uninitialized = 1,
  48. s_start,
  49. s_start_boundary,
  50. s_header_field_start,
  51. s_header_field,
  52. s_headers_almost_done,
  53. s_header_value_start,
  54. s_header_value,
  55. s_header_value_almost_done,
  56. s_part_data_start,
  57. s_part_data,
  58. s_part_data_almost_boundary,
  59. s_part_data_boundary,
  60. s_part_data_almost_end,
  61. s_part_data_end,
  62. s_part_data_final_hyphen,
  63. s_end
  64. };
  65. multipart_parser* multipart_parser_init
  66. (const char *boundary, const multipart_parser_settings* settings) {
  67. multipart_parser* p = malloc(sizeof(multipart_parser) +
  68. strlen(boundary) +
  69. strlen(boundary) + 9);
  70. strcpy(p->multipart_boundary, boundary);
  71. p->boundary_length = strlen(boundary);
  72. p->lookbehind = (p->multipart_boundary + p->boundary_length + 1);
  73. p->index = 0;
  74. p->state = s_start;
  75. p->settings = settings;
  76. return p;
  77. }
  78. void multipart_parser_free(multipart_parser* p) {
  79. free(p);
  80. }
  81. void multipart_parser_set_data(multipart_parser *p, void *data) {
  82. p->data = data;
  83. }
  84. void *multipart_parser_get_data(multipart_parser *p) {
  85. return p->data;
  86. }
  87. size_t multipart_parser_execute(multipart_parser* p, const char *buf, size_t len) {
  88. size_t i = 0;
  89. size_t mark = 0;
  90. char c, cl;
  91. int is_last = 0;
  92. while(i < len) {
  93. c = buf[i];
  94. is_last = (i == (len - 1));
  95. switch (p->state) {
  96. case s_start:
  97. multipart_log("s_start");
  98. p->index = 0;
  99. p->state = s_start_boundary;
  100. /* fallthrough */
  101. case s_start_boundary:
  102. multipart_log("s_start_boundary");
  103. if (p->index == p->boundary_length) {
  104. if (c != CR) {
  105. return i;
  106. }
  107. p->index++;
  108. break;
  109. } else if (p->index == (p->boundary_length + 1)) {
  110. if (c != LF) {
  111. return i;
  112. }
  113. p->index = 0;
  114. NOTIFY_CB(part_data_begin);
  115. p->state = s_header_field_start;
  116. break;
  117. }
  118. if (c != p->multipart_boundary[p->index]) {
  119. return i;
  120. }
  121. p->index++;
  122. break;
  123. case s_header_field_start:
  124. multipart_log("s_header_field_start");
  125. mark = i;
  126. p->state = s_header_field;
  127. /* fallthrough */
  128. case s_header_field:
  129. multipart_log("s_header_field");
  130. if (c == CR) {
  131. p->state = s_headers_almost_done;
  132. break;
  133. }
  134. if (c == ':') {
  135. EMIT_DATA_CB(header_field, buf + mark, i - mark);
  136. p->state = s_header_value_start;
  137. break;
  138. }
  139. cl = tolower(c);
  140. if ((c != '-') && (cl < 'a' || cl > 'z')) {
  141. multipart_log("invalid character in header name");
  142. return i;
  143. }
  144. if (is_last)
  145. EMIT_DATA_CB(header_field, buf + mark, (i - mark) + 1);
  146. break;
  147. case s_headers_almost_done:
  148. multipart_log("s_headers_almost_done");
  149. if (c != LF) {
  150. return i;
  151. }
  152. p->state = s_part_data_start;
  153. break;
  154. case s_header_value_start:
  155. multipart_log("s_header_value_start");
  156. if (c == ' ') {
  157. break;
  158. }
  159. mark = i;
  160. p->state = s_header_value;
  161. /* fallthrough */
  162. case s_header_value:
  163. multipart_log("s_header_value");
  164. if (c == CR) {
  165. EMIT_DATA_CB(header_value, buf + mark, i - mark);
  166. p->state = s_header_value_almost_done;
  167. break;
  168. }
  169. if (is_last)
  170. EMIT_DATA_CB(header_value, buf + mark, (i - mark) + 1);
  171. break;
  172. case s_header_value_almost_done:
  173. multipart_log("s_header_value_almost_done");
  174. if (c != LF) {
  175. return i;
  176. }
  177. p->state = s_header_field_start;
  178. break;
  179. case s_part_data_start:
  180. multipart_log("s_part_data_start");
  181. NOTIFY_CB(headers_complete);
  182. mark = i;
  183. p->state = s_part_data;
  184. /* fallthrough */
  185. case s_part_data:
  186. multipart_log("s_part_data");
  187. if (c == CR) {
  188. EMIT_DATA_CB(part_data, buf + mark, i - mark);
  189. mark = i;
  190. p->state = s_part_data_almost_boundary;
  191. p->lookbehind[0] = CR;
  192. break;
  193. }
  194. if (is_last)
  195. EMIT_DATA_CB(part_data, buf + mark, (i - mark) + 1);
  196. break;
  197. case s_part_data_almost_boundary:
  198. multipart_log("s_part_data_almost_boundary");
  199. if (c == LF) {
  200. p->state = s_part_data_boundary;
  201. p->lookbehind[1] = LF;
  202. p->index = 0;
  203. break;
  204. }
  205. EMIT_DATA_CB(part_data, p->lookbehind, 1);
  206. p->state = s_part_data;
  207. mark = i --;
  208. break;
  209. case s_part_data_boundary:
  210. multipart_log("s_part_data_boundary");
  211. if (p->multipart_boundary[p->index] != c) {
  212. EMIT_DATA_CB(part_data, p->lookbehind, 2 + p->index);
  213. p->state = s_part_data;
  214. mark = i --;
  215. break;
  216. }
  217. p->lookbehind[2 + p->index] = c;
  218. if ((++ p->index) == p->boundary_length) {
  219. NOTIFY_CB(part_data_end);
  220. p->state = s_part_data_almost_end;
  221. }
  222. break;
  223. case s_part_data_almost_end:
  224. multipart_log("s_part_data_almost_end");
  225. if (c == '-') {
  226. p->state = s_part_data_final_hyphen;
  227. break;
  228. }
  229. if (c == CR) {
  230. p->state = s_part_data_end;
  231. break;
  232. }
  233. return i;
  234. case s_part_data_final_hyphen:
  235. multipart_log("s_part_data_final_hyphen");
  236. if (c == '-') {
  237. NOTIFY_CB(body_end);
  238. p->state = s_end;
  239. break;
  240. }
  241. return i;
  242. case s_part_data_end:
  243. multipart_log("s_part_data_end");
  244. if (c == LF) {
  245. p->state = s_header_field_start;
  246. NOTIFY_CB(part_data_begin);
  247. break;
  248. }
  249. return i;
  250. case s_end:
  251. multipart_log("s_end: %02X", (int) c);
  252. break;
  253. default:
  254. multipart_log("Multipart parser unrecoverable error");
  255. return 0;
  256. }
  257. ++ i;
  258. }
  259. return len;
  260. }