multipart_parser.c 16 KB


  1. <<<<<<< HEAD
  2. /* Based on node-formidable by Felix Geisendörfer
  3. * Igor Afonov - afonov@gmail.com - 2012
  4. * MIT License - http://www.opensource.org/licenses/mit-license.php
  5. */
  6. #include "multipart_parser.h"
  7. #include <stdio.h>
  8. #include <stdarg.h>
  9. #include <string.h>
  10. static void multipart_log(const char * format, ...)
  11. {
  12. #ifdef DEBUG_MULTIPART
  13. va_list args;
  14. va_start(args, format);
  15. fprintf(stderr, "[HTTP_MULTIPART_PARSER] %s:%d: ", __FILE__, __LINE__);
  16. vfprintf(stderr, format, args);
  17. fprintf(stderr, "\n");
  18. #endif
  19. }
  20. #define NOTIFY_CB(FOR) \
  21. do { \
  22. if (p->settings->on_##FOR) { \
  23. if (p->settings->on_##FOR(p) != 0) { \
  24. return i; \
  25. } \
  26. } \
  27. } while (0)
  28. #define EMIT_DATA_CB(FOR, ptr, len) \
  29. do { \
  30. if (p->settings->on_##FOR) { \
  31. if (p->settings->on_##FOR(p, ptr, len) != 0) { \
  32. return i; \
  33. } \
  34. } \
  35. } while (0)
  36. #define LF 10
  37. #define CR 13
  38. struct multipart_parser {
  39. void * data;
  40. size_t index;
  41. size_t boundary_length;
  42. unsigned char state;
  43. const multipart_parser_settings* settings;
  44. char* lookbehind;
  45. char multipart_boundary[1];
  46. };
  47. enum state {
  48. s_uninitialized = 1,
  49. s_start,
  50. s_start_boundary,
  51. s_header_field_start,
  52. s_header_field,
  53. s_headers_almost_done,
  54. s_header_value_start,
  55. s_header_value,
  56. s_header_value_almost_done,
  57. s_part_data_start,
  58. s_part_data,
  59. s_part_data_almost_boundary,
  60. s_part_data_boundary,
  61. s_part_data_almost_end,
  62. s_part_data_end,
  63. s_part_data_final_hyphen,
  64. s_end
  65. };
  66. multipart_parser* multipart_parser_init
  67. (const char *boundary, const multipart_parser_settings* settings) {
  68. multipart_parser* p = malloc(sizeof(multipart_parser) +
  69. strlen(boundary) +
  70. strlen(boundary) + 9);
  71. strcpy(p->multipart_boundary, boundary);
  72. p->boundary_length = strlen(boundary);
  73. p->lookbehind = (p->multipart_boundary + p->boundary_length + 1);
  74. p->index = 0;
  75. p->state = s_start;
  76. p->settings = settings;
  77. return p;
  78. }
  79. void multipart_parser_free(multipart_parser* p) {
  80. free(p);
  81. }
  82. void multipart_parser_set_data(multipart_parser *p, void *data) {
  83. p->data = data;
  84. }
  85. void *multipart_parser_get_data(multipart_parser *p) {
  86. return p->data;
  87. }
  88. size_t multipart_parser_execute(multipart_parser* p, const char *buf, size_t len) {
  89. size_t i = 0;
  90. size_t mark = 0;
  91. char c, cl;
  92. int is_last = 0;
  93. while(i < len) {
  94. c = buf[i];
  95. is_last = (i == (len - 1));
  96. switch (p->state) {
  97. case s_start:
  98. multipart_log("s_start");
  99. p->index = 0;
  100. p->state = s_start_boundary;
  101. /* fallthrough */
  102. case s_start_boundary:
  103. multipart_log("s_start_boundary");
  104. if (p->index == p->boundary_length) {
  105. if (c != CR) {
  106. return i;
  107. }
  108. p->index++;
  109. break;
  110. } else if (p->index == (p->boundary_length + 1)) {
  111. if (c != LF) {
  112. return i;
  113. }
  114. p->index = 0;
  115. NOTIFY_CB(part_data_begin);
  116. p->state = s_header_field_start;
  117. break;
  118. }
  119. if (c != p->multipart_boundary[p->index]) {
  120. return i;
  121. }
  122. p->index++;
  123. break;
  124. case s_header_field_start:
  125. multipart_log("s_header_field_start");
  126. mark = i;
  127. p->state = s_header_field;
  128. /* fallthrough */
  129. case s_header_field:
  130. multipart_log("s_header_field");
  131. if (c == CR) {
  132. p->state = s_headers_almost_done;
  133. break;
  134. }
  135. if (c == ':') {
  136. EMIT_DATA_CB(header_field, buf + mark, i - mark);
  137. p->state = s_header_value_start;
  138. break;
  139. }
  140. cl = tolower(c);
  141. if ((c != '-') && (cl < 'a' || cl > 'z')) {
  142. multipart_log("invalid character in header name");
  143. return i;
  144. }
  145. if (is_last)
  146. EMIT_DATA_CB(header_field, buf + mark, (i - mark) + 1);
  147. break;
  148. case s_headers_almost_done:
  149. multipart_log("s_headers_almost_done");
  150. if (c != LF) {
  151. return i;
  152. }
  153. p->state = s_part_data_start;
  154. break;
  155. case s_header_value_start:
  156. multipart_log("s_header_value_start");
  157. if (c == ' ') {
  158. break;
  159. }
  160. mark = i;
  161. p->state = s_header_value;
  162. /* fallthrough */
  163. case s_header_value:
  164. multipart_log("s_header_value");
  165. if (c == CR) {
  166. EMIT_DATA_CB(header_value, buf + mark, i - mark);
  167. p->state = s_header_value_almost_done;
  168. break;
  169. }
  170. if (is_last)
  171. EMIT_DATA_CB(header_value, buf + mark, (i - mark) + 1);
  172. break;
  173. case s_header_value_almost_done:
  174. multipart_log("s_header_value_almost_done");
  175. if (c != LF) {
  176. return i;
  177. }
  178. p->state = s_header_field_start;
  179. break;
  180. case s_part_data_start:
  181. multipart_log("s_part_data_start");
  182. NOTIFY_CB(headers_complete);
  183. mark = i;
  184. p->state = s_part_data;
  185. /* fallthrough */
  186. case s_part_data:
  187. multipart_log("s_part_data");
  188. if (c == CR) {
  189. EMIT_DATA_CB(part_data, buf + mark, i - mark);
  190. mark = i;
  191. p->state = s_part_data_almost_boundary;
  192. p->lookbehind[0] = CR;
  193. break;
  194. }
  195. if (is_last)
  196. EMIT_DATA_CB(part_data, buf + mark, (i - mark) + 1);
  197. break;
  198. case s_part_data_almost_boundary:
  199. multipart_log("s_part_data_almost_boundary");
  200. if (c == LF) {
  201. p->state = s_part_data_boundary;
  202. p->lookbehind[1] = LF;
  203. p->index = 0;
  204. break;
  205. }
  206. EMIT_DATA_CB(part_data, p->lookbehind, 1);
  207. p->state = s_part_data;
  208. mark = i --;
  209. break;
  210. case s_part_data_boundary:
  211. multipart_log("s_part_data_boundary");
  212. if (p->multipart_boundary[p->index] != c) {
  213. EMIT_DATA_CB(part_data, p->lookbehind, 2 + p->index);
  214. p->state = s_part_data;
  215. mark = i --;
  216. break;
  217. }
  218. p->lookbehind[2 + p->index] = c;
  219. if ((++ p->index) == p->boundary_length) {
  220. NOTIFY_CB(part_data_end);
  221. p->state = s_part_data_almost_end;
  222. }
  223. break;
  224. case s_part_data_almost_end:
  225. multipart_log("s_part_data_almost_end");
  226. if (c == '-') {
  227. p->state = s_part_data_final_hyphen;
  228. break;
  229. }
  230. if (c == CR) {
  231. p->state = s_part_data_end;
  232. break;
  233. }
  234. return i;
  235. case s_part_data_final_hyphen:
  236. multipart_log("s_part_data_final_hyphen");
  237. if (c == '-') {
  238. NOTIFY_CB(body_end);
  239. p->state = s_end;
  240. break;
  241. }
  242. return i;
  243. case s_part_data_end:
  244. multipart_log("s_part_data_end");
  245. if (c == LF) {
  246. p->state = s_header_field_start;
  247. NOTIFY_CB(part_data_begin);
  248. break;
  249. }
  250. return i;
  251. case s_end:
  252. multipart_log("s_end: %02X", (int) c);
  253. break;
  254. default:
  255. multipart_log("Multipart parser unrecoverable error");
  256. return 0;
  257. }
  258. ++ i;
  259. }
  260. return len;
  261. }
  262. =======
  263. /* Based on node-formidable by Felix Geisendörfer
  264. * Igor Afonov - afonov@gmail.com - 2012
  265. * MIT License - http://www.opensource.org/licenses/mit-license.php
  266. */
  267. #include "multipart_parser.h"
  268. #include <stdio.h>
  269. #include <stdarg.h>
  270. #include <string.h>
  271. static void multipart_log(const char * format, ...)
  272. {
  273. #ifdef DEBUG_MULTIPART
  274. va_list args;
  275. va_start(args, format);
  276. fprintf(stderr, "[HTTP_MULTIPART_PARSER] %s:%d: ", __FILE__, __LINE__);
  277. vfprintf(stderr, format, args);
  278. fprintf(stderr, "\n");
  279. #endif
  280. }
  281. #define NOTIFY_CB(FOR) \
  282. do { \
  283. if (p->settings->on_##FOR) { \
  284. if (p->settings->on_##FOR(p) != 0) { \
  285. return i; \
  286. } \
  287. } \
  288. } while (0)
  289. #define EMIT_DATA_CB(FOR, ptr, len) \
  290. do { \
  291. if (p->settings->on_##FOR) { \
  292. if (p->settings->on_##FOR(p, ptr, len) != 0) { \
  293. return i; \
  294. } \
  295. } \
  296. } while (0)
  297. #define LF 10
  298. #define CR 13
  299. struct multipart_parser {
  300. void * data;
  301. size_t index;
  302. size_t boundary_length;
  303. unsigned char state;
  304. const multipart_parser_settings* settings;
  305. char* lookbehind;
  306. char multipart_boundary[1];
  307. };
  308. enum state {
  309. s_uninitialized = 1,
  310. s_start,
  311. s_start_boundary,
  312. s_header_field_start,
  313. s_header_field,
  314. s_headers_almost_done,
  315. s_header_value_start,
  316. s_header_value,
  317. s_header_value_almost_done,
  318. s_part_data_start,
  319. s_part_data,
  320. s_part_data_almost_boundary,
  321. s_part_data_boundary,
  322. s_part_data_almost_end,
  323. s_part_data_end,
  324. s_part_data_final_hyphen,
  325. s_end
  326. };
  327. multipart_parser* multipart_parser_init
  328. (const char *boundary, const multipart_parser_settings* settings) {
  329. multipart_parser* p = malloc(sizeof(multipart_parser) +
  330. strlen(boundary) +
  331. strlen(boundary) + 9);
  332. strcpy(p->multipart_boundary, boundary);
  333. p->boundary_length = strlen(boundary);
  334. p->lookbehind = (p->multipart_boundary + p->boundary_length + 1);
  335. p->index = 0;
  336. p->state = s_start;
  337. p->settings = settings;
  338. return p;
  339. }
  340. void multipart_parser_free(multipart_parser* p) {
  341. free(p);
  342. }
  343. void multipart_parser_set_data(multipart_parser *p, void *data) {
  344. p->data = data;
  345. }
  346. void *multipart_parser_get_data(multipart_parser *p) {
  347. return p->data;
  348. }
  349. size_t multipart_parser_execute(multipart_parser* p, const char *buf, size_t len) {
  350. size_t i = 0;
  351. size_t mark = 0;
  352. char c, cl;
  353. int is_last = 0;
  354. while(i < len) {
  355. c = buf[i];
  356. is_last = (i == (len - 1));
  357. switch (p->state) {
  358. case s_start:
  359. multipart_log("s_start");
  360. p->index = 0;
  361. p->state = s_start_boundary;
  362. /* fallthrough */
  363. case s_start_boundary:
  364. multipart_log("s_start_boundary");
  365. if (p->index == p->boundary_length) {
  366. if (c != CR) {
  367. return i;
  368. }
  369. p->index++;
  370. break;
  371. } else if (p->index == (p->boundary_length + 1)) {
  372. if (c != LF) {
  373. return i;
  374. }
  375. p->index = 0;
  376. NOTIFY_CB(part_data_begin);
  377. p->state = s_header_field_start;
  378. break;
  379. }
  380. if (c != p->multipart_boundary[p->index]) {
  381. return i;
  382. }
  383. p->index++;
  384. break;
  385. case s_header_field_start:
  386. multipart_log("s_header_field_start");
  387. mark = i;
  388. p->state = s_header_field;
  389. /* fallthrough */
  390. case s_header_field:
  391. multipart_log("s_header_field");
  392. if (c == CR) {
  393. p->state = s_headers_almost_done;
  394. break;
  395. }
  396. if (c == ':') {
  397. EMIT_DATA_CB(header_field, buf + mark, i - mark);
  398. p->state = s_header_value_start;
  399. break;
  400. }
  401. cl = tolower(c);
  402. if ((c != '-') && (cl < 'a' || cl > 'z')) {
  403. multipart_log("invalid character in header name");
  404. return i;
  405. }
  406. if (is_last)
  407. EMIT_DATA_CB(header_field, buf + mark, (i - mark) + 1);
  408. break;
  409. case s_headers_almost_done:
  410. multipart_log("s_headers_almost_done");
  411. if (c != LF) {
  412. return i;
  413. }
  414. p->state = s_part_data_start;
  415. break;
  416. case s_header_value_start:
  417. multipart_log("s_header_value_start");
  418. if (c == ' ') {
  419. break;
  420. }
  421. mark = i;
  422. p->state = s_header_value;
  423. /* fallthrough */
  424. case s_header_value:
  425. multipart_log("s_header_value");
  426. if (c == CR) {
  427. EMIT_DATA_CB(header_value, buf + mark, i - mark);
  428. p->state = s_header_value_almost_done;
  429. break;
  430. }
  431. if (is_last)
  432. EMIT_DATA_CB(header_value, buf + mark, (i - mark) + 1);
  433. break;
  434. case s_header_value_almost_done:
  435. multipart_log("s_header_value_almost_done");
  436. if (c != LF) {
  437. return i;
  438. }
  439. p->state = s_header_field_start;
  440. break;
  441. case s_part_data_start:
  442. multipart_log("s_part_data_start");
  443. NOTIFY_CB(headers_complete);
  444. mark = i;
  445. p->state = s_part_data;
  446. /* fallthrough */
  447. case s_part_data:
  448. multipart_log("s_part_data");
  449. if (c == CR) {
  450. EMIT_DATA_CB(part_data, buf + mark, i - mark);
  451. mark = i;
  452. p->state = s_part_data_almost_boundary;
  453. p->lookbehind[0] = CR;
  454. break;
  455. }
  456. if (is_last)
  457. EMIT_DATA_CB(part_data, buf + mark, (i - mark) + 1);
  458. break;
  459. case s_part_data_almost_boundary:
  460. multipart_log("s_part_data_almost_boundary");
  461. if (c == LF) {
  462. p->state = s_part_data_boundary;
  463. p->lookbehind[1] = LF;
  464. p->index = 0;
  465. break;
  466. }
  467. EMIT_DATA_CB(part_data, p->lookbehind, 1);
  468. p->state = s_part_data;
  469. mark = i --;
  470. break;
  471. case s_part_data_boundary:
  472. multipart_log("s_part_data_boundary");
  473. if (p->multipart_boundary[p->index] != c) {
  474. EMIT_DATA_CB(part_data, p->lookbehind, 2 + p->index);
  475. p->state = s_part_data;
  476. mark = i --;
  477. break;
  478. }
  479. p->lookbehind[2 + p->index] = c;
  480. if ((++ p->index) == p->boundary_length) {
  481. NOTIFY_CB(part_data_end);
  482. p->state = s_part_data_almost_end;
  483. }
  484. break;
  485. case s_part_data_almost_end:
  486. multipart_log("s_part_data_almost_end");
  487. if (c == '-') {
  488. p->state = s_part_data_final_hyphen;
  489. break;
  490. }
  491. if (c == CR) {
  492. p->state = s_part_data_end;
  493. break;
  494. }
  495. return i;
  496. case s_part_data_final_hyphen:
  497. multipart_log("s_part_data_final_hyphen");
  498. if (c == '-') {
  499. NOTIFY_CB(body_end);
  500. p->state = s_end;
  501. break;
  502. }
  503. return i;
  504. case s_part_data_end:
  505. multipart_log("s_part_data_end");
  506. if (c == LF) {
  507. p->state = s_header_field_start;
  508. NOTIFY_CB(part_data_begin);
  509. break;
  510. }
  511. return i;
  512. case s_end:
  513. multipart_log("s_end: %02X", (int) c);
  514. break;
  515. default:
  516. multipart_log("Multipart parser unrecoverable error");
  517. return 0;
  518. }
  519. ++ i;
  520. }
  521. return len;
  522. }
  523. >>>>>>> 08a3f61f844fd56297a491016c3f540974b6f434