libs/http/include/boost/http/parser.hpp

100.0% Lines (15/15) 100.0% Functions (6/6) 100.0% Branches (3/3)
libs/http/include/boost/http/parser.hpp
Line Branch Hits Source Code
1 //
2 // Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com)
3 // Copyright (c) 2024 Mohammad Nejati
4 //
5 // Distributed under the Boost Software License, Version 1.0. (See accompanying
6 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 //
8 // Official repository: https://github.com/cppalliance/http
9 //
10
11 #ifndef BOOST_HTTP_PARSER_HPP
12 #define BOOST_HTTP_PARSER_HPP
13
14 #include <boost/http/config.hpp>
15 #include <boost/http/detail/header.hpp>
16 #include <boost/http/detail/type_traits.hpp>
17 #include <boost/http/error.hpp>
18
19 #include <boost/capy/buffers/buffer_copy.hpp>
20 #include <boost/capy/buffers/buffer_pair.hpp>
21 #include <boost/capy/buffers/slice.hpp>
22 #include <boost/capy/concept/read_stream.hpp>
23 #include <boost/capy/concept/write_sink.hpp>
24 #include <boost/capy/cond.hpp>
25 #include <boost/capy/error.hpp>
26 #include <boost/capy/io_task.hpp>
27 #include <boost/core/span.hpp>
28
29 #include <cstddef>
30 #include <cstdint>
31 #include <memory>
32
33 namespace boost {
34 namespace http {
35
36 // Forward declaration
37 class request_parser;
38 class response_parser;
39 class static_request;
40 class static_response;
41
42 //------------------------------------------------
43
44 /** A parser for HTTP/1 messages.
45
46 This parser uses a single block of memory allocated
47 during construction and guarantees it will never
48 exceed the specified size. This space is reused for
49 parsing multiple HTTP messages ( one at a time ).
50
51 The allocated space is used for:
52
53 @li Buffering raw input from a socket
54 @li Storing HTTP headers with O(1) access to
55 method, target, and status code
56 @li Storing all or part of an HTTP message body
57 @li Storing state for inflate algorithms
58
59 The parser is strict. Any malformed input according
60 to the HTTP ABNFs is treated as an unrecoverable
61 error.
62
63 @see
64 @ref response_parser,
65 @ref request_parser.
66 */
67 class parser
68 {
69 public:
70 template<capy::ReadStream Stream>
71 class source;
72
73 /// Buffer type returned from @ref prepare.
74 using mutable_buffers_type =
75 boost::span<capy::mutable_buffer const>;
76
77 /// Buffer type returned from @ref pull_body.
78 using const_buffers_type =
79 boost::span<capy::const_buffer const>;
80
81 //--------------------------------------------
82 //
83 // Observers
84 //
85 //--------------------------------------------
86
87 /// Check if a complete header has been parsed.
88 BOOST_HTTP_DECL
89 bool
90 got_header() const noexcept;
91
92 /// Check if a complete message has been parsed.
93 BOOST_HTTP_DECL
94 bool
95 is_complete() const noexcept;
96
97 //--------------------------------------------
98 //
99 // Modifiers
100 //
101 //--------------------------------------------
102
103 /// Prepare for a new stream.
104 BOOST_HTTP_DECL
105 void
106 reset() noexcept;
107
108 /** Prepare for a new message.
109
110 @par Preconditions
111 Either this is the first message in the stream,
112 or the previous message has been fully parsed.
113 */
114 BOOST_HTTP_DECL
115 void
116 start();
117
118 /** Return a buffer for reading input.
119
120 After writing to the buffer, call @ref commit
121 with the number of bytes written.
122
123 @par Preconditions
124 @ref parse returned @ref condition::need_more_input.
125
126 @par Postconditions
127 A call to @ref commit or @ref commit_eof is
128 required before calling @ref prepare again.
129
130 @par Exception Safety
131 Strong guarantee.
132
133 @return A non-empty mutable buffer.
134
135 @see @ref commit, @ref commit_eof.
136 */
137 BOOST_HTTP_DECL
138 mutable_buffers_type
139 prepare();
140
141 /** Commit bytes to the input buffer.
142
143 @par Preconditions
144 @li `n <= capy::buffer_size( this->prepare() )`
145 @li No prior call to @ref commit or @ref commit_eof
146 since the last @ref prepare
147
148 @par Postconditions
149 Buffers from @ref prepare are invalidated.
150
151 @par Exception Safety
152 Strong guarantee.
153
154 @param n The number of bytes written.
155
156 @see @ref parse, @ref prepare.
157 */
158 BOOST_HTTP_DECL
159 void
160 commit(
161 std::size_t n);
162
163 /** Indicate end of input.
164
165 Call this when the underlying stream has closed
166 and no more data will arrive.
167
168 @par Postconditions
169 Buffers from @ref prepare are invalidated.
170
171 @par Exception Safety
172 Strong guarantee.
173
174 @see @ref parse, @ref prepare.
175 */
176 BOOST_HTTP_DECL
177 void
178 commit_eof();
179
180 /** Parse pending input data.
181
182 Returns immediately after the header is fully
183 parsed to allow @ref set_body_limit to be called
184 before body parsing begins. If an error occurs
185 during body parsing, the parsed header remains
186 valid and accessible.
187
188 When `ec == condition::need_more_input`, read
189 more data and call @ref commit before calling
190 this function again.
191
192 When `ec == error::end_of_stream`, the stream
193 closed cleanly. Call @ref reset to reuse the
194 parser for a new stream.
195
196 @param ec Set to the error, if any occurred.
197
198 @see @ref start, @ref prepare, @ref commit.
199 */
200 BOOST_HTTP_DECL
201 void
202 parse(
203 system::error_code& ec);
204
205 /** Set maximum body size for the current message.
206
207 Overrides @ref parser_config::body_limit for this
208 message only. The limit resets to the default
209 for subsequent messages.
210
211 @par Preconditions
212 `this->got_header() == true` and body parsing
213 has not started.
214
215 @par Exception Safety
216 Strong guarantee.
217
218 @param n The body size limit in bytes.
219
220 @see @ref parser_config::body_limit.
221 */
222 BOOST_HTTP_DECL
223 void
224 set_body_limit(std::uint64_t n);
225
226 /** Return available body data.
227
228 Use this to incrementally process body data.
229 Call @ref consume_body after processing to
230 release the buffer space.
231
232 @par Example
233 @code
234 request_parser pr( ctx );
235 pr.start();
236 co_await pr.read_header( stream );
237
238 while( ! pr.is_complete() )
239 {
240 co_await read_some( stream, pr );
241 auto cbs = pr.pull_body();
242 // process cbs ...
243 pr.consume_body( capy::buffer_size( cbs ) );
244 }
245 @endcode
246
247 @par Preconditions
248 `this->got_header() == true`
249
250 @par Postconditions
251 The returned buffer is invalidated by any
252 modifying member function.
253
254 @par Exception Safety
255 Strong guarantee.
256
257 @return Buffers containing available body data.
258
259 @see @ref consume_body.
260 */
261 BOOST_HTTP_DECL
262 const_buffers_type
263 pull_body();
264
265 /** Consume bytes from available body data.
266
267 @par Preconditions
268 `n <= capy::buffer_size( this->pull_body() )`
269
270 @par Exception Safety
271 Strong guarantee.
272
273 @param n The number of bytes to consume.
274
275 @see @ref pull_body.
276 */
277 BOOST_HTTP_DECL
278 void
279 consume_body(std::size_t n);
280
281 /** Return the complete body.
282
283 Use this when the entire message fits within
284 the parser's internal buffer.
285
286 @par Example
287 @code
288 request_parser pr( ctx );
289 pr.start();
290 co_await pr.read_header( stream );
291 // ... read entire body ...
292 core::string_view body = pr.body();
293 @endcode
294
295 @par Preconditions
296 @li `this->is_complete() == true`
297 @li No previous call to @ref consume_body
298
299 @par Exception Safety
300 Strong guarantee.
301
302 @return A string view of the complete body.
303
304 @see @ref is_complete.
305 */
306 BOOST_HTTP_DECL
307 core::string_view
308 body() const;
309
310 /** Return unconsumed data past the last message.
311
312 Use this after an upgrade or CONNECT request
313 to retrieve protocol-dependent data that
314 follows the HTTP message.
315
316 @return A string view of leftover data.
317
318 @see @ref metadata::upgrade, @ref metadata::connection.
319 */
320 BOOST_HTTP_DECL
321 core::string_view
322 release_buffered_data() noexcept;
323
324 /** Asynchronously read the HTTP headers.
325
326 Reads from the stream until the headers are
327 complete or an error occurs.
328
329 @par Preconditions
330 @li @ref reset has been called
331 @li @ref start has been called
332
333 @param stream The stream to read from.
334
335 @return An awaitable yielding `(error_code)`.
336
337 @see @ref read.
338 */
339 template<capy::ReadStream Stream>
340 capy::io_task<>
341 read_header(Stream& stream);
342
343 /** Asynchronously read body data into buffers.
344
345 Reads from the stream and copies body data into
346 the provided buffers with complete-fill semantics.
347 Returns `capy::error::eof` when the body is complete.
348
349 @par Preconditions
350 @li @ref reset has been called
351 @li @ref start has been called
352
353 @param stream The stream to read from.
354
355 @param buffers The buffers to read into.
356
357 @return An awaitable yielding `(error_code,std::size_t)`.
358
359 @see @ref read_header.
360 */
361 template<capy::ReadStream Stream, capy::MutableBufferSequence MB>
362 capy::io_task<std::size_t>
363 read(Stream& stream, MB buffers);
364
365 /** Return a source for reading body data.
366
367 The returned source satisfies @ref capy::BufferSource.
368 On first pull, headers are automatically parsed if
369 not yet received.
370
371 @par Example
372 @code
373 request_parser pr( ctx );
374 pr.start();
375 auto body = pr.source_for( socket );
376
377 capy::const_buffer arr[16];
378 auto [ec, bufs] = co_await body.pull( arr );
379 body.consume( buffer_size( bufs ) );
380 @endcode
381
382 @param stream The stream to read from.
383
384 @return A source satisfying @ref capy::BufferSource.
385
386 @see @ref read_header, @ref capy::BufferSource.
387 */
388 template<capy::ReadStream Stream>
389 source<Stream>
390 source_for(Stream& stream) noexcept;
391
392 /** Read body from stream and push to a WriteSink.
393
394 Reads body data from the stream and pushes each chunk to
395 the sink. The sink must consume all bytes from each write.
396
397 @param stream The stream to read body data from.
398
399 @param sink The sink to receive body data.
400
401 @return An awaitable yielding `(error_code)`.
402
403 @see WriteSink.
404 */
405 template<capy::WriteSink Sink>
406 capy::io_task<>
407 read(capy::ReadStream auto& stream, Sink&& sink);
408
409 private:
410 friend class request_parser;
411 friend class response_parser;
412 class impl;
413
414 BOOST_HTTP_DECL ~parser();
415 BOOST_HTTP_DECL parser() noexcept;
416 BOOST_HTTP_DECL parser(parser&& other) noexcept;
417 BOOST_HTTP_DECL parser(
418 std::shared_ptr<parser_config_impl const> cfg,
419 detail::kind k);
420 BOOST_HTTP_DECL void assign(parser&& other) noexcept;
421
422 BOOST_HTTP_DECL
423 void
424 start_impl(bool);
425
426 static_request const&
427 safe_get_request() const;
428
429 static_response const&
430 safe_get_response() const;
431
432 impl* impl_;
433 };
434
435 /** A source for reading the message body.
436
437 This type satisfies @ref capy::BufferSource. It can be
438 constructed immediately after parser construction; on
439 first pull, headers are automatically parsed if not
440 yet received.
441
442 @tparam Stream A type satisfying @ref capy::ReadStream.
443
444 @see @ref parser::source_for.
445 */
446 template<capy::ReadStream Stream>
447 class parser::source
448 {
449 Stream* stream_;
450 parser* pr_;
451
452 public:
453 /// Default constructor.
454 source() noexcept
455 : stream_(nullptr)
456 , pr_(nullptr)
457 {
458 }
459
460 /// Construct a source for reading body data.
461 260 source(Stream& stream, parser& pr) noexcept
462 260 : stream_(&stream)
463 260 , pr_(&pr)
464 {
465 260 }
466
467 /** Pull buffer data from the body.
468
469 On first invocation, reads headers if not yet parsed.
470 Returns buffer descriptors pointing to internal parser
471 memory. When the body is complete, returns an empty span.
472
473 @param dest Span of const_buffer to fill.
474
475 @return An awaitable yielding `(error_code,std::span<const_buffer>)`.
476 */
477 capy::io_task<std::span<capy::const_buffer>>
478 pull(std::span<capy::const_buffer> dest);
479
480 /** Consume bytes from pulled body data.
481
482 Advances the read position by the specified number of
483 bytes. The next pull returns data starting after the
484 consumed bytes.
485
486 @param n The number of bytes to consume.
487 */
488 void
489 consume(std::size_t n) noexcept;
490 };
491
492 template<capy::ReadStream Stream>
493 capy::io_task<>
494
1/1
✓ Branch 1 taken 654 times.
654 parser::
495 read_header(Stream& stream)
496 {
497 system::error_code ec;
498 for(;;)
499 {
500 parse(ec);
501
502 if(got_header())
503 co_return {};
504
505 if(ec != condition::need_more_input)
506 co_return {ec};
507
508 auto mbs = prepare();
509
510 auto [read_ec, n] = co_await stream.read_some(mbs);
511 if(read_ec == capy::cond::eof)
512 commit_eof();
513 else if(!read_ec)
514 commit(n);
515 else
516 co_return {read_ec};
517 }
518 1308 }
519
520 template<capy::ReadStream Stream, capy::MutableBufferSequence MB>
521 capy::io_task<std::size_t>
522 parser::
523 read(Stream& stream, MB buffers)
524 {
525 if(capy::buffer_empty(buffers))
526 co_return {{}, 0};
527
528 std::size_t total = 0;
529 auto dest = capy::sans_prefix(buffers, 0);
530
531 for(;;)
532 {
533 system::error_code ec;
534 parse(ec);
535
536 if(got_header())
537 {
538 auto body_data = pull_body();
539 if(capy::buffer_size(body_data) > 0)
540 {
541 std::size_t copied = capy::buffer_copy(dest, body_data);
542 consume_body(copied);
543 total += copied;
544 dest = capy::sans_prefix(dest, copied);
545
546 if(capy::buffer_empty(dest))
547 co_return {{}, total};
548 }
549
550 if(is_complete())
551 co_return {capy::error::eof, total};
552 }
553
554 if(ec == condition::need_more_input)
555 {
556 auto mbs = prepare();
557 auto [read_ec, n] = co_await stream.read_some(mbs);
558
559 if(read_ec == capy::cond::eof)
560 commit_eof();
561 else if(!read_ec)
562 commit(n);
563 else
564 co_return {read_ec, total};
565
566 continue;
567 }
568
569 if(ec)
570 co_return {ec, total};
571 }
572 }
573
574 template<capy::ReadStream Stream>
575 parser::source<Stream>
576 260 parser::
577 source_for(Stream& stream) noexcept
578 {
579 260 return source<Stream>(stream, *this);
580 }
581
582 template<capy::ReadStream Stream>
583 capy::io_task<std::span<capy::const_buffer>>
584
1/1
✓ Branch 1 taken 816 times.
816 parser::source<Stream>::
585 pull(std::span<capy::const_buffer> dest)
586 {
587 // Read headers if not yet parsed
588 if(!pr_->got_header())
589 {
590 auto [ec] = co_await pr_->read_header(*stream_);
591 if(ec)
592 co_return {ec, {}};
593 }
594
595 for(;;)
596 {
597 system::error_code ec;
598 pr_->parse(ec);
599
600 auto body_data = pr_->pull_body();
601 if(capy::buffer_size(body_data) > 0)
602 {
603 std::size_t count = (std::min)(body_data.size(), dest.size());
604 for(std::size_t i = 0; i < count; ++i)
605 dest[i] = body_data[i];
606 co_return {{}, dest.first(count)};
607 }
608
609 if(pr_->is_complete())
610 co_return {{}, {}};
611
612 if(ec == condition::need_more_input)
613 {
614 auto mbs = pr_->prepare();
615 auto [read_ec, n] = co_await stream_->read_some(mbs);
616
617 if(read_ec == capy::cond::eof)
618 pr_->commit_eof();
619 else if(!read_ec)
620 pr_->commit(n);
621 else
622 co_return {read_ec, {}};
623
624 continue;
625 }
626
627 if(ec)
628 co_return {ec, {}};
629 }
630 1632 }
631
632 template<capy::ReadStream Stream>
633 void
634 556 parser::source<Stream>::
635 consume(std::size_t n) noexcept
636 {
637 556 pr_->consume_body(n);
638 556 }
639
640 template<capy::WriteSink Sink>
641 capy::io_task<>
642
1/1
✓ Branch 1 taken 138 times.
138 parser::
643 read(capy::ReadStream auto& stream, Sink&& sink)
644 {
645 for(;;)
646 {
647 system::error_code ec;
648 parse(ec);
649
650 if(got_header())
651 {
652 auto body_data = pull_body();
653 if(capy::buffer_size(body_data) > 0)
654 {
655 auto [write_ec, n] = co_await sink.write(body_data);
656 if(write_ec)
657 co_return {write_ec};
658 consume_body(n);
659 }
660
661 if(is_complete())
662 {
663 auto [eof_ec] = co_await sink.write_eof();
664 co_return {eof_ec};
665 }
666 }
667
668 if(ec == condition::need_more_input)
669 {
670 auto mbs = prepare();
671 auto [read_ec, n] = co_await stream.read_some(mbs);
672
673 if(read_ec == capy::cond::eof)
674 commit_eof();
675 else if(!read_ec)
676 commit(n);
677 else
678 co_return {read_ec};
679
680 continue;
681 }
682
683 if(ec)
684 co_return {ec};
685 }
686 276 }
687
688 } // http
689 } // boost
690
691 #endif
692