Line data Source code
1 : //
2 : // Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com)
3 : // Copyright (c) 2024 Mohammad Nejati
4 : //
5 : // Distributed under the Boost Software License, Version 1.0. (See accompanying
6 : // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 : //
8 : // Official repository: https://github.com/cppalliance/http
9 : //
10 :
11 : #ifndef BOOST_HTTP_PARSER_HPP
12 : #define BOOST_HTTP_PARSER_HPP
13 :
14 : #include <boost/http/config.hpp>
15 : #include <boost/http/detail/header.hpp>
16 : #include <boost/http/detail/type_traits.hpp>
17 : #include <boost/http/error.hpp>
18 :
19 : #include <boost/capy/buffers/buffer_copy.hpp>
20 : #include <boost/capy/buffers/buffer_pair.hpp>
21 : #include <boost/capy/buffers/slice.hpp>
22 : #include <boost/capy/concept/read_stream.hpp>
23 : #include <boost/capy/concept/write_sink.hpp>
24 : #include <boost/capy/cond.hpp>
25 : #include <boost/capy/error.hpp>
26 : #include <boost/capy/io_task.hpp>
27 : #include <boost/core/span.hpp>
28 :
29 : #include <cstddef>
30 : #include <cstdint>
31 : #include <memory>
32 :
33 : namespace boost {
34 : namespace http {
35 :
36 : // Forward declaration
37 : class request_parser;
38 : class response_parser;
39 : class static_request;
40 : class static_response;
41 :
42 : //------------------------------------------------
43 :
44 : /** A parser for HTTP/1 messages.
45 :
46 : This parser uses a single block of memory allocated
47 : during construction and guarantees it will never
48 : exceed the specified size. This space is reused for
49 : parsing multiple HTTP messages ( one at a time ).
50 :
51 : The allocated space is used for:
52 :
53 : @li Buffering raw input from a socket
54 : @li Storing HTTP headers with O(1) access to
55 : method, target, and status code
56 : @li Storing all or part of an HTTP message body
57 : @li Storing state for inflate algorithms
58 :
59 : The parser is strict. Any malformed input according
60 : to the HTTP ABNFs is treated as an unrecoverable
61 : error.
62 :
63 : @see
64 : @ref response_parser,
65 : @ref request_parser.
66 : */
67 : class parser
68 : {
69 : public:
70 : template<capy::ReadStream Stream>
71 : class source;
72 :
73 : /// Buffer type returned from @ref prepare.
74 : using mutable_buffers_type =
75 : boost::span<capy::mutable_buffer const>;
76 :
77 : /// Buffer type returned from @ref pull_body.
78 : using const_buffers_type =
79 : boost::span<capy::const_buffer const>;
80 :
81 : //--------------------------------------------
82 : //
83 : // Observers
84 : //
85 : //--------------------------------------------
86 :
87 : /// Check if a complete header has been parsed.
88 : BOOST_HTTP_DECL
89 : bool
90 : got_header() const noexcept;
91 :
92 : /// Check if a complete message has been parsed.
93 : BOOST_HTTP_DECL
94 : bool
95 : is_complete() const noexcept;
96 :
97 : //--------------------------------------------
98 : //
99 : // Modifiers
100 : //
101 : //--------------------------------------------
102 :
103 : /// Prepare for a new stream.
104 : BOOST_HTTP_DECL
105 : void
106 : reset() noexcept;
107 :
108 : /** Prepare for a new message.
109 :
110 : @par Preconditions
111 : Either this is the first message in the stream,
112 : or the previous message has been fully parsed.
113 : */
114 : BOOST_HTTP_DECL
115 : void
116 : start();
117 :
118 : /** Return a buffer for reading input.
119 :
120 : After writing to the buffer, call @ref commit
121 : with the number of bytes written.
122 :
123 : @par Preconditions
124 : @ref parse returned @ref condition::need_more_input.
125 :
126 : @par Postconditions
127 : A call to @ref commit or @ref commit_eof is
128 : required before calling @ref prepare again.
129 :
130 : @par Exception Safety
131 : Strong guarantee.
132 :
133 : @return A non-empty mutable buffer.
134 :
135 : @see @ref commit, @ref commit_eof.
136 : */
137 : BOOST_HTTP_DECL
138 : mutable_buffers_type
139 : prepare();
140 :
141 : /** Commit bytes to the input buffer.
142 :
143 : @par Preconditions
144 : @li `n <= capy::buffer_size( this->prepare() )`
145 : @li No prior call to @ref commit or @ref commit_eof
146 : since the last @ref prepare
147 :
148 : @par Postconditions
149 : Buffers from @ref prepare are invalidated.
150 :
151 : @par Exception Safety
152 : Strong guarantee.
153 :
154 : @param n The number of bytes written.
155 :
156 : @see @ref parse, @ref prepare.
157 : */
158 : BOOST_HTTP_DECL
159 : void
160 : commit(
161 : std::size_t n);
162 :
163 : /** Indicate end of input.
164 :
165 : Call this when the underlying stream has closed
166 : and no more data will arrive.
167 :
168 : @par Postconditions
169 : Buffers from @ref prepare are invalidated.
170 :
171 : @par Exception Safety
172 : Strong guarantee.
173 :
174 : @see @ref parse, @ref prepare.
175 : */
176 : BOOST_HTTP_DECL
177 : void
178 : commit_eof();
179 :
180 : /** Parse pending input data.
181 :
182 : Returns immediately after the header is fully
183 : parsed to allow @ref set_body_limit to be called
184 : before body parsing begins. If an error occurs
185 : during body parsing, the parsed header remains
186 : valid and accessible.
187 :
188 : When `ec == condition::need_more_input`, read
189 : more data and call @ref commit before calling
190 : this function again.
191 :
192 : When `ec == error::end_of_stream`, the stream
193 : closed cleanly. Call @ref reset to reuse the
194 : parser for a new stream.
195 :
196 : @param ec Set to the error, if any occurred.
197 :
198 : @see @ref start, @ref prepare, @ref commit.
199 : */
200 : BOOST_HTTP_DECL
201 : void
202 : parse(
203 : system::error_code& ec);
204 :
205 : /** Set maximum body size for the current message.
206 :
207 : Overrides @ref parser_config::body_limit for this
208 : message only. The limit resets to the default
209 : for subsequent messages.
210 :
211 : @par Preconditions
212 : `this->got_header() == true` and body parsing
213 : has not started.
214 :
215 : @par Exception Safety
216 : Strong guarantee.
217 :
218 : @param n The body size limit in bytes.
219 :
220 : @see @ref parser_config::body_limit.
221 : */
222 : BOOST_HTTP_DECL
223 : void
224 : set_body_limit(std::uint64_t n);
225 :
226 : /** Return available body data.
227 :
228 : Use this to incrementally process body data.
229 : Call @ref consume_body after processing to
230 : release the buffer space.
231 :
232 : @par Example
233 : @code
234 : request_parser pr( ctx );
235 : pr.start();
236 : co_await pr.read_header( stream );
237 :
238 : while( ! pr.is_complete() )
239 : {
240 : co_await read_some( stream, pr );
241 : auto cbs = pr.pull_body();
242 : // process cbs ...
243 : pr.consume_body( capy::buffer_size( cbs ) );
244 : }
245 : @endcode
246 :
247 : @par Preconditions
248 : `this->got_header() == true`
249 :
250 : @par Postconditions
251 : The returned buffer is invalidated by any
252 : modifying member function.
253 :
254 : @par Exception Safety
255 : Strong guarantee.
256 :
257 : @return Buffers containing available body data.
258 :
259 : @see @ref consume_body.
260 : */
261 : BOOST_HTTP_DECL
262 : const_buffers_type
263 : pull_body();
264 :
265 : /** Consume bytes from available body data.
266 :
267 : @par Preconditions
268 : `n <= capy::buffer_size( this->pull_body() )`
269 :
270 : @par Exception Safety
271 : Strong guarantee.
272 :
273 : @param n The number of bytes to consume.
274 :
275 : @see @ref pull_body.
276 : */
277 : BOOST_HTTP_DECL
278 : void
279 : consume_body(std::size_t n);
280 :
281 : /** Return the complete body.
282 :
283 : Use this when the entire message fits within
284 : the parser's internal buffer.
285 :
286 : @par Example
287 : @code
288 : request_parser pr( ctx );
289 : pr.start();
290 : co_await pr.read_header( stream );
291 : // ... read entire body ...
292 : core::string_view body = pr.body();
293 : @endcode
294 :
295 : @par Preconditions
296 : @li `this->is_complete() == true`
297 : @li No previous call to @ref consume_body
298 :
299 : @par Exception Safety
300 : Strong guarantee.
301 :
302 : @return A string view of the complete body.
303 :
304 : @see @ref is_complete.
305 : */
306 : BOOST_HTTP_DECL
307 : core::string_view
308 : body() const;
309 :
310 : /** Return unconsumed data past the last message.
311 :
312 : Use this after an upgrade or CONNECT request
313 : to retrieve protocol-dependent data that
314 : follows the HTTP message.
315 :
316 : @return A string view of leftover data.
317 :
318 : @see @ref metadata::upgrade, @ref metadata::connection.
319 : */
320 : BOOST_HTTP_DECL
321 : core::string_view
322 : release_buffered_data() noexcept;
323 :
324 : /** Asynchronously read the HTTP headers.
325 :
326 : Reads from the stream until the headers are
327 : complete or an error occurs.
328 :
329 : @par Preconditions
330 : @li @ref reset has been called
331 : @li @ref start has been called
332 :
333 : @param stream The stream to read from.
334 :
335 : @return An awaitable yielding `(error_code)`.
336 :
337 : @see @ref read.
338 : */
339 : template<capy::ReadStream Stream>
340 : capy::io_task<>
341 : read_header(Stream& stream);
342 :
343 : /** Asynchronously read body data into buffers.
344 :
345 : Reads from the stream and copies body data into
346 : the provided buffers with complete-fill semantics.
347 : Returns `capy::error::eof` when the body is complete.
348 :
349 : @par Preconditions
350 : @li @ref reset has been called
351 : @li @ref start has been called
352 :
353 : @param stream The stream to read from.
354 :
355 : @param buffers The buffers to read into.
356 :
357 : @return An awaitable yielding `(error_code,std::size_t)`.
358 :
359 : @see @ref read_header.
360 : */
361 : template<capy::ReadStream Stream, capy::MutableBufferSequence MB>
362 : capy::io_task<std::size_t>
363 : read(Stream& stream, MB buffers);
364 :
365 : /** Return a source for reading body data.
366 :
367 : The returned source satisfies @ref capy::BufferSource.
368 : On first pull, headers are automatically parsed if
369 : not yet received.
370 :
371 : @par Example
372 : @code
373 : request_parser pr( ctx );
374 : pr.start();
375 : auto body = pr.source_for( socket );
376 :
377 : capy::const_buffer arr[16];
378 : auto [ec, bufs] = co_await body.pull( arr );
379 : body.consume( buffer_size( bufs ) );
380 : @endcode
381 :
382 : @param stream The stream to read from.
383 :
384 : @return A source satisfying @ref capy::BufferSource.
385 :
386 : @see @ref read_header, @ref capy::BufferSource.
387 : */
388 : template<capy::ReadStream Stream>
389 : source<Stream>
390 : source_for(Stream& stream) noexcept;
391 :
392 : /** Read body from stream and push to a WriteSink.
393 :
394 : Reads body data from the stream and pushes each chunk to
395 : the sink. The sink must consume all bytes from each write.
396 :
397 : @param stream The stream to read body data from.
398 :
399 : @param sink The sink to receive body data.
400 :
401 : @return An awaitable yielding `(error_code)`.
402 :
403 : @see WriteSink.
404 : */
405 : template<capy::WriteSink Sink>
406 : capy::io_task<>
407 : read(capy::ReadStream auto& stream, Sink&& sink);
408 :
409 : private:
410 : friend class request_parser;
411 : friend class response_parser;
412 : class impl;
413 :
414 : BOOST_HTTP_DECL ~parser();
415 : BOOST_HTTP_DECL parser() noexcept;
416 : BOOST_HTTP_DECL parser(parser&& other) noexcept;
417 : BOOST_HTTP_DECL parser(
418 : std::shared_ptr<parser_config_impl const> cfg,
419 : detail::kind k);
420 : BOOST_HTTP_DECL void assign(parser&& other) noexcept;
421 :
422 : BOOST_HTTP_DECL
423 : void
424 : start_impl(bool);
425 :
426 : static_request const&
427 : safe_get_request() const;
428 :
429 : static_response const&
430 : safe_get_response() const;
431 :
432 : impl* impl_;
433 : };
434 :
435 : /** A source for reading the message body.
436 :
437 : This type satisfies @ref capy::BufferSource. It can be
438 : constructed immediately after parser construction; on
439 : first pull, headers are automatically parsed if not
440 : yet received.
441 :
442 : @tparam Stream A type satisfying @ref capy::ReadStream.
443 :
444 : @see @ref parser::source_for.
445 : */
446 : template<capy::ReadStream Stream>
447 : class parser::source
448 : {
449 : Stream* stream_;
450 : parser* pr_;
451 :
452 : public:
453 : /// Default constructor.
454 : source() noexcept
455 : : stream_(nullptr)
456 : , pr_(nullptr)
457 : {
458 : }
459 :
460 : /// Construct a source for reading body data.
461 260 : source(Stream& stream, parser& pr) noexcept
462 260 : : stream_(&stream)
463 260 : , pr_(&pr)
464 : {
465 260 : }
466 :
467 : /** Pull buffer data from the body.
468 :
469 : On first invocation, reads headers if not yet parsed.
470 : Returns buffer descriptors pointing to internal parser
471 : memory. When the body is complete, returns an empty span.
472 :
473 : @param dest Span of const_buffer to fill.
474 :
475 : @return An awaitable yielding `(error_code,std::span<const_buffer>)`.
476 : */
477 : capy::io_task<std::span<capy::const_buffer>>
478 : pull(std::span<capy::const_buffer> dest);
479 :
480 : /** Consume bytes from pulled body data.
481 :
482 : Advances the read position by the specified number of
483 : bytes. The next pull returns data starting after the
484 : consumed bytes.
485 :
486 : @param n The number of bytes to consume.
487 : */
488 : void
489 : consume(std::size_t n) noexcept;
490 : };
491 :
492 : template<capy::ReadStream Stream>
493 : capy::io_task<>
494 654 : parser::
495 : read_header(Stream& stream)
496 : {
497 : system::error_code ec;
498 : for(;;)
499 : {
500 : parse(ec);
501 :
502 : if(got_header())
503 : co_return {};
504 :
505 : if(ec != condition::need_more_input)
506 : co_return {ec};
507 :
508 : auto mbs = prepare();
509 :
510 : auto [read_ec, n] = co_await stream.read_some(mbs);
511 : if(read_ec == capy::cond::eof)
512 : commit_eof();
513 : else if(!read_ec)
514 : commit(n);
515 : else
516 : co_return {read_ec};
517 : }
518 1308 : }
519 :
520 : template<capy::ReadStream Stream, capy::MutableBufferSequence MB>
521 : capy::io_task<std::size_t>
522 : parser::
523 : read(Stream& stream, MB buffers)
524 : {
525 : if(capy::buffer_empty(buffers))
526 : co_return {{}, 0};
527 :
528 : std::size_t total = 0;
529 : auto dest = capy::sans_prefix(buffers, 0);
530 :
531 : for(;;)
532 : {
533 : system::error_code ec;
534 : parse(ec);
535 :
536 : if(got_header())
537 : {
538 : auto body_data = pull_body();
539 : if(capy::buffer_size(body_data) > 0)
540 : {
541 : std::size_t copied = capy::buffer_copy(dest, body_data);
542 : consume_body(copied);
543 : total += copied;
544 : dest = capy::sans_prefix(dest, copied);
545 :
546 : if(capy::buffer_empty(dest))
547 : co_return {{}, total};
548 : }
549 :
550 : if(is_complete())
551 : co_return {capy::error::eof, total};
552 : }
553 :
554 : if(ec == condition::need_more_input)
555 : {
556 : auto mbs = prepare();
557 : auto [read_ec, n] = co_await stream.read_some(mbs);
558 :
559 : if(read_ec == capy::cond::eof)
560 : commit_eof();
561 : else if(!read_ec)
562 : commit(n);
563 : else
564 : co_return {read_ec, total};
565 :
566 : continue;
567 : }
568 :
569 : if(ec)
570 : co_return {ec, total};
571 : }
572 : }
573 :
574 : template<capy::ReadStream Stream>
575 : parser::source<Stream>
576 260 : parser::
577 : source_for(Stream& stream) noexcept
578 : {
579 260 : return source<Stream>(stream, *this);
580 : }
581 :
582 : template<capy::ReadStream Stream>
583 : capy::io_task<std::span<capy::const_buffer>>
584 816 : parser::source<Stream>::
585 : pull(std::span<capy::const_buffer> dest)
586 : {
587 : // Read headers if not yet parsed
588 : if(!pr_->got_header())
589 : {
590 : auto [ec] = co_await pr_->read_header(*stream_);
591 : if(ec)
592 : co_return {ec, {}};
593 : }
594 :
595 : for(;;)
596 : {
597 : system::error_code ec;
598 : pr_->parse(ec);
599 :
600 : auto body_data = pr_->pull_body();
601 : if(capy::buffer_size(body_data) > 0)
602 : {
603 : std::size_t count = (std::min)(body_data.size(), dest.size());
604 : for(std::size_t i = 0; i < count; ++i)
605 : dest[i] = body_data[i];
606 : co_return {{}, dest.first(count)};
607 : }
608 :
609 : if(pr_->is_complete())
610 : co_return {{}, {}};
611 :
612 : if(ec == condition::need_more_input)
613 : {
614 : auto mbs = pr_->prepare();
615 : auto [read_ec, n] = co_await stream_->read_some(mbs);
616 :
617 : if(read_ec == capy::cond::eof)
618 : pr_->commit_eof();
619 : else if(!read_ec)
620 : pr_->commit(n);
621 : else
622 : co_return {read_ec, {}};
623 :
624 : continue;
625 : }
626 :
627 : if(ec)
628 : co_return {ec, {}};
629 : }
630 1632 : }
631 :
632 : template<capy::ReadStream Stream>
633 : void
634 556 : parser::source<Stream>::
635 : consume(std::size_t n) noexcept
636 : {
637 556 : pr_->consume_body(n);
638 556 : }
639 :
640 : template<capy::WriteSink Sink>
641 : capy::io_task<>
642 138 : parser::
643 : read(capy::ReadStream auto& stream, Sink&& sink)
644 : {
645 : for(;;)
646 : {
647 : system::error_code ec;
648 : parse(ec);
649 :
650 : if(got_header())
651 : {
652 : auto body_data = pull_body();
653 : if(capy::buffer_size(body_data) > 0)
654 : {
655 : auto [write_ec, n] = co_await sink.write(body_data);
656 : if(write_ec)
657 : co_return {write_ec};
658 : consume_body(n);
659 : }
660 :
661 : if(is_complete())
662 : {
663 : auto [eof_ec] = co_await sink.write_eof();
664 : co_return {eof_ec};
665 : }
666 : }
667 :
668 : if(ec == condition::need_more_input)
669 : {
670 : auto mbs = prepare();
671 : auto [read_ec, n] = co_await stream.read_some(mbs);
672 :
673 : if(read_ec == capy::cond::eof)
674 : commit_eof();
675 : else if(!read_ec)
676 : commit(n);
677 : else
678 : co_return {read_ec};
679 :
680 : continue;
681 : }
682 :
683 : if(ec)
684 : co_return {ec};
685 : }
686 276 : }
687 :
688 : } // http
689 : } // boost
690 :
691 : #endif
|