libs/http/src/server/route_abnf.cpp

93.6% Lines (248/265) 100.0% Functions (34/34) 85.3% Branches (186/218)
libs/http/src/server/route_abnf.cpp
Line Branch Hits Source Code
1 //
2 // Copyright (c) 2025 Vinnie Falco (vinnie dot falco at gmail dot com)
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 //
7 // Official repository: https://github.com/cppalliance/http
8 //
9
10 #include "src/server/route_abnf.hpp"
11 #include <boost/url/grammar/error.hpp>
12
13 namespace boost {
14 namespace http {
15 namespace detail {
16
17 namespace {
18
19 //------------------------------------------------
20 // Character classification
21 //------------------------------------------------
22
23 // Special characters that have meaning in patterns
24 constexpr bool
25 676 is_special(char c) noexcept
26 {
27
2/2
✓ Branch 0 taken 53 times.
✓ Branch 1 taken 623 times.
676 switch(c)
28 {
29 53 case '{':
30 case '}':
31 case '(':
32 case ')':
33 case '[':
34 case ']':
35 case '+':
36 case '?':
37 case '!':
38 case ':':
39 case '*':
40 case '\\':
41 53 return true;
42 623 default:
43 623 return false;
44 }
45 }
46
47 // Reserved characters (parsed but invalid)
48 constexpr bool
49 203 is_reserved(char c) noexcept
50 {
51
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 198 times.
203 switch(c)
52 {
53 5 case '(':
54 case ')':
55 case '[':
56 case ']':
57 case '+':
58 case '?':
59 case '!':
60 5 return true;
61 198 default:
62 198 return false;
63 }
64 }
65
66 // Valid identifier start (ASCII subset of ID_Start)
67 constexpr bool
68 102 is_id_start(char c) noexcept
69 {
70 return
71
4/4
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 83 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 15 times.
102 (c >= 'a' && c <= 'z') ||
72
3/4
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
✓ Branch 2 taken 17 times.
✗ Branch 3 not taken.
19 (c >= 'A' && c <= 'Z') ||
73
3/4
✓ Branch 0 taken 85 times.
✓ Branch 1 taken 17 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 17 times.
204 c == '_' || c == '$';
74 }
75
76 // Valid identifier continuation (ASCII subset of ID_Continue)
77 constexpr bool
78 73 is_id_continue(char c) noexcept
79 {
80 return
81
4/4
✓ Branch 1 taken 17 times.
✓ Branch 2 taken 56 times.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 15 times.
75 is_id_start(c) ||
82
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
75 (c >= '0' && c <= '9');
83 }
84
85 //------------------------------------------------
86 // Parser state
87 //------------------------------------------------
88
89 class parser
90 {
91 char const* it_;
92 char const* end_;
93 core::string_view original_;
94
95 public:
96 136 parser(core::string_view s)
97 136 : it_(s.data())
98 136 , end_(s.data() + s.size())
99 136 , original_(s)
100 {
101 136 }
102
103 bool
104 1564 at_end() const noexcept
105 {
106 1564 return it_ == end_;
107 }
108
109 char
110 1111 peek() const noexcept
111 {
112 1111 return *it_;
113 }
114
115 void
116 61 advance() noexcept
117 {
118 61 ++it_;
119 61 }
120
121 char
122 745 get() noexcept
123 {
124 745 return *it_++;
125 }
126
127 std::size_t
128 pos() const noexcept
129 {
130 return static_cast<std::size_t>(
131 it_ - original_.data());
132 }
133
134 //--------------------------------------------
135 // Name parsing
136 //--------------------------------------------
137
138 // Parse identifier: id-start *id-continue
139 system::result<std::string>
140 29 parse_identifier()
141 {
142
3/6
✓ Branch 1 taken 29 times.
✗ Branch 2 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 29 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 29 times.
29 if(at_end() || !is_id_start(peek()))
143 return grammar::error::mismatch;
144
145 29 std::string result;
146
1/1
✓ Branch 2 taken 29 times.
29 result += get();
147
148
6/6
✓ Branch 1 taken 73 times.
✓ Branch 2 taken 12 times.
✓ Branch 5 taken 56 times.
✓ Branch 6 taken 17 times.
✓ Branch 7 taken 56 times.
✓ Branch 8 taken 29 times.
85 while(!at_end() && is_id_continue(peek()))
149
1/1
✓ Branch 2 taken 56 times.
56 result += get();
150
151 29 return result;
152 29 }
153
154 // Parse quoted name: DQUOTE *quoted-char DQUOTE
155 system::result<std::string>
156 4 parse_quoted_name()
157 {
158
3/6
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 4 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 4 times.
4 if(at_end() || peek() != '"')
159 return grammar::error::mismatch;
160
161 4 advance(); // skip opening quote
162 4 std::string result;
163
164
2/2
✓ Branch 1 taken 35 times.
✓ Branch 2 taken 1 time.
36 while(!at_end())
165 {
166 35 char c = peek();
167
168
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 32 times.
35 if(c == '"')
169 {
170 3 advance(); // skip closing quote
171
2/2
✓ Branch 1 taken 1 time.
✓ Branch 2 taken 2 times.
3 if(result.empty())
172 1 return grammar::error::syntax;
173 2 return result;
174 }
175
176
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 32 times.
32 if(c == '\\')
177 {
178 advance(); // skip backslash
179 if(at_end())
180 return grammar::error::syntax;
181 result += get();
182 }
183 else
184 {
185
1/1
✓ Branch 2 taken 32 times.
32 result += get();
186 }
187 }
188
189 // Unterminated quote
190 1 return grammar::error::syntax;
191 4 }
192
193 // Parse name: identifier / quoted-name
194 system::result<std::string>
195 35 parse_name()
196 {
197
2/2
✓ Branch 1 taken 2 times.
✓ Branch 2 taken 33 times.
35 if(at_end())
198 2 return grammar::error::syntax;
199
200
2/2
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 29 times.
33 if(peek() == '"')
201 4 return parse_quoted_name();
202
203 29 return parse_identifier();
204 }
205
206 //--------------------------------------------
207 // Token parsing
208 //--------------------------------------------
209
210 // Parse text: 1*(char / escaped-char)
211 system::result<route_token>
212 155 parse_text()
213 {
214 155 std::string result;
215
216
2/2
✓ Branch 1 taken 676 times.
✓ Branch 2 taken 107 times.
783 while(!at_end())
217 {
218 676 char c = peek();
219
220 // Stop at special characters
221
2/2
✓ Branch 1 taken 53 times.
✓ Branch 2 taken 623 times.
676 if(is_special(c))
222 {
223
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 47 times.
53 if(c == '\\')
224 {
225 // Escaped character
226 6 advance();
227
2/2
✓ Branch 1 taken 1 time.
✓ Branch 2 taken 5 times.
6 if(at_end())
228 1 return grammar::error::syntax;
229
1/1
✓ Branch 2 taken 5 times.
5 result += get();
230 5 continue;
231 }
232 47 break;
233 }
234
235
1/1
✓ Branch 2 taken 623 times.
623 result += get();
236 }
237
238
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 154 times.
154 if(result.empty())
239 return grammar::error::mismatch;
240
241 154 return route_token(route_token_type::text, std::move(result));
242 155 }
243
244 // Parse param: ":" name
245 system::result<route_token>
246 30 parse_param()
247 {
248
3/6
✓ Branch 1 taken 30 times.
✗ Branch 2 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 30 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 30 times.
30 if(at_end() || peek() != ':')
249 return grammar::error::mismatch;
250
251 30 advance(); // skip ':'
252
253
1/1
✓ Branch 1 taken 30 times.
30 auto rv = parse_name();
254
2/2
✓ Branch 1 taken 3 times.
✓ Branch 2 taken 27 times.
30 if(rv.has_error())
255 3 return rv.error();
256
257 54 return route_token(
258
1/1
✓ Branch 2 taken 27 times.
81 route_token_type::param, std::move(rv.value()));
259 30 }
260
261 // Parse wildcard: "*" name
262 system::result<route_token>
263 5 parse_wildcard()
264 {
265
3/6
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 5 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 5 times.
5 if(at_end() || peek() != '*')
266 return grammar::error::mismatch;
267
268 5 advance(); // skip '*'
269
270
1/1
✓ Branch 1 taken 5 times.
5 auto rv = parse_name();
271
2/2
✓ Branch 1 taken 1 time.
✓ Branch 2 taken 4 times.
5 if(rv.has_error())
272 1 return rv.error();
273
274 8 return route_token(
275
1/1
✓ Branch 2 taken 4 times.
12 route_token_type::wildcard, std::move(rv.value()));
276 5 }
277
278 // Parse group: "{" *token "}"
279 system::result<route_token>
280 7 parse_group()
281 {
282
3/6
✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 7 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 7 times.
7 if(at_end() || peek() != '{')
283 return grammar::error::mismatch;
284
285 7 advance(); // skip '{'
286
287 7 route_token group;
288 7 group.type = route_token_type::group;
289
290 // Parse tokens until '}'
291
6/6
✓ Branch 1 taken 16 times.
✓ Branch 2 taken 1 time.
✓ Branch 4 taken 10 times.
✓ Branch 5 taken 6 times.
✓ Branch 6 taken 10 times.
✓ Branch 7 taken 7 times.
17 while(!at_end() && peek() != '}')
292 {
293
1/1
✓ Branch 1 taken 10 times.
10 auto rv = parse_token();
294
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 10 times.
10 if(rv.has_error())
295 return rv.error();
296
2/2
✓ Branch 2 taken 10 times.
✓ Branch 6 taken 10 times.
10 group.children.push_back(std::move(rv.value()));
297 10 }
298
299
2/2
✓ Branch 1 taken 1 time.
✓ Branch 2 taken 6 times.
7 if(at_end())
300 1 return grammar::error::syntax; // unclosed group
301
302 6 advance(); // skip '}'
303
304 6 return group;
305 7 }
306
307 // Parse single token
308 system::result<route_token>
309 203 parse_token()
310 {
311
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 203 times.
203 if(at_end())
312 return grammar::error::syntax;
313
314 203 char c = peek();
315
316 // Check for reserved characters
317
2/2
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 198 times.
203 if(is_reserved(c))
318 5 return grammar::error::syntax;
319
320 // Try each token type
321
2/2
✓ Branch 0 taken 30 times.
✓ Branch 1 taken 168 times.
198 if(c == ':')
322 30 return parse_param();
323
324
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 163 times.
168 if(c == '*')
325 5 return parse_wildcard();
326
327
2/2
✓ Branch 0 taken 7 times.
✓ Branch 1 taken 156 times.
163 if(c == '{')
328 7 return parse_group();
329
330
2/2
✓ Branch 0 taken 1 time.
✓ Branch 1 taken 155 times.
156 if(c == '}')
331 1 return grammar::error::syntax; // unexpected '}'
332
333 // Must be text
334 155 return parse_text();
335 }
336
337 // Parse entire pattern
338 system::result<std::vector<route_token>>
339 136 parse_tokens()
340 {
341 136 std::vector<route_token> tokens;
342
343
2/2
✓ Branch 1 taken 193 times.
✓ Branch 2 taken 124 times.
317 while(!at_end())
344 {
345
1/1
✓ Branch 1 taken 193 times.
193 auto rv = parse_token();
346
2/2
✓ Branch 1 taken 12 times.
✓ Branch 2 taken 181 times.
193 if(rv.has_error())
347 12 return rv.error();
348
2/2
✓ Branch 2 taken 181 times.
✓ Branch 6 taken 181 times.
181 tokens.push_back(std::move(rv.value()));
349 193 }
350
351 124 return tokens;
352 136 }
353 };
354
355 //------------------------------------------------
356 // Case-insensitive comparison
357 //------------------------------------------------
358
359 bool
360 502 ci_equal(char a, char b) noexcept
361 {
362
4/4
✓ Branch 0 taken 350 times.
✓ Branch 1 taken 152 times.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 340 times.
502 if(a >= 'A' && a <= 'Z')
363 10 a = static_cast<char>(a + 32);
364
4/4
✓ Branch 0 taken 350 times.
✓ Branch 1 taken 152 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 348 times.
502 if(b >= 'A' && b <= 'Z')
365 2 b = static_cast<char>(b + 32);
366 502 return a == b;
367 }
368
369 bool
370 123 ci_starts_with(
371 core::string_view str,
372 core::string_view prefix) noexcept
373 {
374
2/2
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 113 times.
123 if(prefix.size() > str.size())
375 10 return false;
376
2/2
✓ Branch 1 taken 502 times.
✓ Branch 2 taken 106 times.
608 for(std::size_t i = 0; i < prefix.size(); ++i)
377 {
378
2/2
✓ Branch 3 taken 7 times.
✓ Branch 4 taken 495 times.
502 if(!ci_equal(str[i], prefix[i]))
379 7 return false;
380 }
381 106 return true;
382 }
383
384 //------------------------------------------------
385 // Route matcher
386 //------------------------------------------------
387
388 class route_matcher
389 {
390 core::string_view path_;
391 match_options const& opts_;
392 std::vector<std::pair<std::string, std::string>> params_;
393 std::size_t pos_ = 0;
394
395 public:
396 102 route_matcher(
397 core::string_view path,
398 match_options const& opts)
399 102 : path_(path)
400 102 , opts_(opts)
401 {
402 102 }
403
404 76 bool at_end() const noexcept
405 {
406 76 return pos_ >= path_.size();
407 }
408
409 87 std::size_t pos() const noexcept
410 {
411 87 return pos_;
412 }
413
414 std::vector<std::pair<std::string, std::string>> const&
415 87 params() const noexcept
416 {
417 87 return params_;
418 }
419
420 // Match text token
421 129 bool match_text(core::string_view text)
422 {
423
1/1
✓ Branch 1 taken 129 times.
129 auto remaining = path_.substr(pos_);
424
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 123 times.
129 if(opts_.case_sensitive)
425 {
426
2/2
✓ Branch 1 taken 3 times.
✓ Branch 2 taken 3 times.
6 if(!remaining.starts_with(text))
427 3 return false;
428 }
429 else
430 {
431
2/2
✓ Branch 1 taken 17 times.
✓ Branch 2 taken 106 times.
123 if(!ci_starts_with(remaining, text))
432 17 return false;
433 }
434 109 pos_ += text.size();
435 109 return true;
436 }
437
438 // Match param token - capture until stop_char, '/' or end
439 26 bool match_param(std::string const& name, char stop_char = '\0')
440 {
441
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 26 times.
26 if(at_end())
442 return false;
443
444 26 auto start = pos_;
445
6/6
✓ Branch 1 taken 84 times.
✓ Branch 2 taken 11 times.
✓ Branch 4 taken 70 times.
✓ Branch 5 taken 14 times.
✓ Branch 6 taken 70 times.
✓ Branch 7 taken 25 times.
95 while(pos_ < path_.size() && path_[pos_] != '/')
446 {
447 // Stop at delimiter if specified
448
6/6
✓ Branch 0 taken 29 times.
✓ Branch 1 taken 41 times.
✓ Branch 3 taken 1 time.
✓ Branch 4 taken 28 times.
✓ Branch 5 taken 1 time.
✓ Branch 6 taken 69 times.
70 if(stop_char != '\0' && path_[pos_] == stop_char)
449 1 break;
450 69 ++pos_;
451 }
452
453 // Param must capture at least one character
454
2/2
✓ Branch 0 taken 1 time.
✓ Branch 1 taken 25 times.
26 if(pos_ == start)
455 1 return false;
456
457
1/1
✓ Branch 1 taken 25 times.
50 params_.emplace_back(
458 name,
459
2/2
✓ Branch 1 taken 25 times.
✓ Branch 4 taken 25 times.
50 std::string(path_.substr(start, pos_ - start)));
460 25 return true;
461 }
462
463 // Match wildcard token - capture everything to end
464 4 bool match_wildcard(std::string const& name)
465 {
466
2/2
✓ Branch 1 taken 1 time.
✓ Branch 2 taken 3 times.
4 if(at_end())
467 1 return false;
468
469 3 auto start = pos_;
470 3 pos_ = path_.size();
471
472 // Wildcard must capture at least one character
473
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 if(pos_ == start)
474 return false;
475
476
1/1
✓ Branch 1 taken 3 times.
6 params_.emplace_back(
477 name,
478
2/2
✓ Branch 1 taken 3 times.
✓ Branch 4 taken 3 times.
6 std::string(path_.substr(start)));
479 3 return true;
480 }
481
482 // Get the first character of the next meaningful token
483 // Returns '\0' if none exists or next token is not text
484 static char
485 176 get_stop_char(
486 std::vector<route_token> const& tokens,
487 std::size_t next_idx)
488 {
489
2/2
✓ Branch 1 taken 113 times.
✓ Branch 2 taken 63 times.
176 if(next_idx >= tokens.size())
490 113 return '\0';
491
492 63 auto const& next = tokens[next_idx];
493
5/6
✓ Branch 0 taken 15 times.
✓ Branch 1 taken 48 times.
✓ Branch 3 taken 15 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 15 times.
✓ Branch 6 taken 48 times.
63 if(next.type == route_token_type::text && !next.value.empty())
494 15 return next.value[0];
495
496 48 return '\0';
497 }
498
499 // Match a sequence of tokens
500 119 bool match_tokens(std::vector<route_token> const& tokens)
501 {
502
2/2
✓ Branch 1 taken 176 times.
✓ Branch 2 taken 97 times.
273 for(std::size_t i = 0; i < tokens.size(); ++i)
503 {
504
2/2
✓ Branch 3 taken 22 times.
✓ Branch 4 taken 154 times.
176 if(!match_token(tokens[i], get_stop_char(tokens, i + 1)))
505 22 return false;
506 }
507 97 return true;
508 }
509
510 // Match a single token
511 176 bool match_token(route_token const& token, char stop_char = '\0')
512 {
513
4/5
✓ Branch 0 taken 129 times.
✓ Branch 1 taken 26 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 17 times.
✗ Branch 4 not taken.
176 switch(token.type)
514 {
515 129 case route_token_type::text:
516
1/1
✓ Branch 2 taken 129 times.
129 return match_text(token.value);
517
518 26 case route_token_type::param:
519 26 return match_param(token.value, stop_char);
520
521 4 case route_token_type::wildcard:
522 4 return match_wildcard(token.value);
523
524 17 case route_token_type::group:
525 17 return match_group(token.children);
526
527 default:
528 return false;
529 }
530 }
531
532 // Match group - try with contents, then without
533 17 bool match_group(std::vector<route_token> const& children)
534 {
535 // Save state before trying group
536 17 auto saved_pos = pos_;
537 17 auto saved_params_size = params_.size();
538
539 // Try matching with group contents
540
2/2
✓ Branch 1 taken 9 times.
✓ Branch 2 taken 8 times.
17 if(match_tokens(children))
541 9 return true;
542
543 // Restore state and try without group
544 8 pos_ = saved_pos;
545 8 params_.resize(saved_params_size);
546 8 return true; // Group is optional, always succeeds if skipped
547 }
548
549 // Check if match is complete based on options
550 88 bool is_complete() const
551 {
552
2/2
✓ Branch 0 taken 42 times.
✓ Branch 1 taken 46 times.
88 if(!opts_.end)
553 42 return true; // Prefix match always succeeds
554
555
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 44 times.
46 if(opts_.strict)
556 2 return at_end();
557
558 // Non-strict: allow trailing slash
559
2/2
✓ Branch 1 taken 42 times.
✓ Branch 2 taken 2 times.
44 if(at_end())
560 42 return true;
561
3/6
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
2 if(pos_ == path_.size() - 1 && path_[pos_] == '/')
562 2 return true;
563
564 return false;
565 }
566 };
567
568 } // anonymous namespace
569
570 //------------------------------------------------
571
572 system::result<route_pattern>
573 136 parse_route_pattern(core::string_view pattern)
574 {
575 136 parser p(pattern);
576
1/1
✓ Branch 1 taken 136 times.
136 auto rv = p.parse_tokens();
577
2/2
✓ Branch 1 taken 12 times.
✓ Branch 2 taken 124 times.
136 if(rv.has_error())
578 12 return rv.error();
579
580 124 route_pattern result;
581
1/1
✓ Branch 2 taken 124 times.
124 result.tokens = std::move(rv.value());
582
1/1
✓ Branch 1 taken 124 times.
124 result.original = std::string(pattern);
583 124 return result;
584 136 }
585
586 //------------------------------------------------
587
588 system::result<match_params>
589 102 match_route(
590 core::string_view path,
591 route_pattern const& pattern,
592 match_options const& opts)
593 {
594 102 route_matcher m(path, opts);
595
596
3/3
✓ Branch 1 taken 102 times.
✓ Branch 3 taken 14 times.
✓ Branch 4 taken 88 times.
102 if(!m.match_tokens(pattern.tokens))
597 14 return grammar::error::mismatch;
598
599
2/2
✓ Branch 1 taken 1 time.
✓ Branch 2 taken 87 times.
88 if(!m.is_complete())
600 1 return grammar::error::mismatch;
601
602 87 match_params result;
603
1/1
✓ Branch 2 taken 87 times.
87 result.params = m.params();
604 87 result.matched_length = m.pos();
605 87 return result;
606 102 }
607
608 } // detail
609 } // http
610 } // boost
611