15 const result_type* base_url) {
21 constexpr bool result_type_is_ada_url = std::is_same_v<url, result_type>;
22 constexpr bool result_type_is_ada_url_aggregator =
23 std::is_same_v<url_aggregator, result_type>;
24 static_assert(result_type_is_ada_url ||
25 result_type_is_ada_url_aggregator);
28 ada_log(
"ada::parser::parse_url('", user_input,
"' [", user_input.size(),
29 " bytes],", (base_url !=
nullptr ? base_url->to_string() :
"null"),
37 if (user_input.size() > std::numeric_limits<uint32_t>::max()) [[unlikely]] {
43 if (base_url !=
nullptr) {
49 if constexpr (result_type_is_ada_url_aggregator && store_values) {
61 uint32_t reserve_capacity =
63 helpers::leading_zeroes(uint32_t(1 | user_input.size()))) +
65 url.reserve(reserve_capacity);
67 std::string tmp_buffer;
68 std::string_view url_data;
69 if (unicode::has_tabs_or_newline(user_input)) [[unlikely]] {
70 tmp_buffer = user_input;
73 helpers::remove_ascii_tab_or_newline(tmp_buffer);
74 url_data = tmp_buffer;
76 url_data = user_input;
81 helpers::trim_c0_whitespace(url_data);
84 std::optional<std::string_view> fragment = helpers::prune_hash(url_data);
93 size_t input_position = 0;
94 const size_t input_size = url_data.size();
99 while (input_position <= input_size) {
100 ada_log(
"In parsing at ", input_position,
" out of ", input_size,
104 ada_log(
"SCHEME_START ", helpers::substring(url_data, input_position));
107 if ((input_position != input_size) &&
119 ada_log(
"SCHEME ", helpers::substring(url_data, input_position));
122 while ((input_position != input_size) &&
123 (unicode::is_alnum_plus(url_data[input_position]))) {
127 if ((input_position != input_size) &&
128 (url_data[input_position] ==
':')) {
129 ada_log(
"SCHEME the scheme should be ",
130 url_data.substr(0, input_position));
131 if constexpr (result_type_is_ada_url) {
132 if (!
url.parse_scheme(url_data.substr(0, input_position))) {
137 if (!
url.parse_scheme_with_colon(
138 url_data.substr(0, input_position + 1))) {
153 base_url->type ==
url.type) {
164 else if (input_position + 1 < input_size &&
165 url_data[input_position + 1] ==
'/') {
187 ada_log(
"NO_SCHEME ", helpers::substring(url_data, input_position));
190 if (base_url ==
nullptr ||
191 (base_url->has_opaque_path && !fragment.has_value())) {
192 ada_log(
"NO_SCHEME validation error");
199 else if (base_url->has_opaque_path && fragment.has_value() &&
200 input_position == input_size) {
201 ada_log(
"NO_SCHEME opaque base with fragment");
202 url.copy_scheme(*base_url);
205 if constexpr (result_type_is_ada_url) {
206 url.path = base_url->path;
207 url.query = base_url->query;
209 url.update_base_pathname(base_url->get_pathname());
210 if (base_url->has_search()) {
214 auto s = base_url->get_search();
215 url.update_base_search(s.empty() ? std::string_view(
"?") : s);
218 url.update_unencoded_base_hash(*fragment);
224 ada_log(
"NO_SCHEME non-file relative path");
229 ada_log(
"NO_SCHEME file base type");
235 ada_log(
"AUTHORITY ", helpers::substring(url_data, input_position));
246 if (url_data.find(
'@', input_position) == std::string_view::npos) {
250 bool at_sign_seen{
false};
251 bool password_token_seen{
false};
258 std::string_view view = url_data.substr(input_position);
261 url.
is_special() ? helpers::find_authority_delimiter_special(view)
262 : helpers::find_authority_delimiter(view);
263 std::string_view authority_view = view.substr(0, location);
264 size_t end_of_authority = input_position + authority_view.size();
266 if ((end_of_authority != input_size) &&
267 (url_data[end_of_authority] ==
'@')) {
270 if (password_token_seen) {
271 if constexpr (result_type_is_ada_url) {
272 url.password +=
"%40";
274 url.append_base_password(
"%40");
277 if constexpr (result_type_is_ada_url) {
278 url.username +=
"%40";
280 url.append_base_username(
"%40");
287 if (!password_token_seen) {
288 size_t password_token_location = authority_view.find(
':');
289 password_token_seen =
290 password_token_location != std::string_view::npos;
292 if constexpr (store_values) {
293 if (!password_token_seen) {
294 if constexpr (result_type_is_ada_url) {
295 url.username += unicode::percent_encode(
299 url.append_base_username(unicode::percent_encode(
304 if constexpr (result_type_is_ada_url) {
305 url.username += unicode::percent_encode(
306 authority_view.substr(0, password_token_location),
308 url.password += unicode::percent_encode(
309 authority_view.substr(password_token_location + 1),
312 url.append_base_username(unicode::percent_encode(
313 authority_view.substr(0, password_token_location),
315 url.append_base_password(unicode::percent_encode(
316 authority_view.substr(password_token_location + 1),
321 }
else if constexpr (store_values) {
322 if constexpr (result_type_is_ada_url) {
323 url.password += unicode::percent_encode(
326 url.append_base_password(unicode::percent_encode(
334 else if (end_of_authority == input_size ||
335 url_data[end_of_authority] ==
'/' ||
336 url_data[end_of_authority] ==
'?' ||
340 if (at_sign_seen && authority_view.empty()) {
347 if (end_of_authority == input_size) {
348 if constexpr (store_values) {
349 if (fragment.has_value()) {
350 url.update_unencoded_base_hash(*fragment);
355 input_position = end_of_authority + 1;
361 ada_log(
"SPECIAL_RELATIVE_OR_AUTHORITY ",
362 helpers::substring(url_data, input_position));
367 if (url_data.substr(input_position, 2) ==
"//") {
379 ada_log(
"PATH_OR_AUTHORITY ",
380 helpers::substring(url_data, input_position));
383 if ((input_position != input_size) &&
384 (url_data[input_position] ==
'/')) {
395 ada_log(
"RELATIVE_SCHEME ",
396 helpers::substring(url_data, input_position));
399 url.copy_scheme(*base_url);
402 if ((input_position != input_size) &&
404 (url_data[input_position] ==
'/')) {
406 "RELATIVE_SCHEME if c is U+002F (/), then set state to relative "
409 }
else if (
url.
is_special() && (input_position != input_size) &&
410 (url_data[input_position] ==
'\\')) {
414 "RELATIVE_SCHEME if url is special and c is U+005C, validation "
415 "error, set state to relative slash state");
418 ada_log(
"RELATIVE_SCHEME otherwise");
423 if constexpr (result_type_is_ada_url) {
424 url.username = base_url->username;
425 url.password = base_url->password;
426 url.host = base_url->host;
427 url.port = base_url->port;
430 url.path = base_url->path;
431 url.query = base_url->query;
433 url.update_base_authority(base_url->get_href(),
434 base_url->get_components());
435 url.update_host_to_base_host(base_url->get_hostname());
436 url.update_base_port(base_url->retrieve_base_port());
439 url.update_base_pathname(base_url->get_pathname());
440 if (base_url->has_search()) {
444 auto s = base_url->get_search();
445 url.update_base_search(s.empty() ? std::string_view(
"?") : s);
453 if ((input_position != input_size) &&
454 (url_data[input_position] ==
'?')) {
458 else if (input_position != input_size) {
461 if constexpr (result_type_is_ada_url) {
463 helpers::shorten_path(
url.path,
url.type);
466 if (helpers::shorten_path(path,
url.type)) {
467 url.update_base_pathname(std::move(std::string(path)));
479 ada_log(
"RELATIVE_SLASH ",
480 helpers::substring(url_data, input_position));
484 (url_data[input_position] ==
'/' ||
485 url_data[input_position] ==
'\\')) {
490 else if ((input_position != input_size) &&
491 (url_data[input_position] ==
'/')) {
501 if constexpr (result_type_is_ada_url) {
502 url.username = base_url->username;
503 url.password = base_url->password;
504 url.host = base_url->host;
505 url.port = base_url->port;
507 url.update_base_authority(base_url->get_href(),
508 base_url->get_components());
509 url.update_host_to_base_host(base_url->get_hostname());
510 url.update_base_port(base_url->retrieve_base_port());
520 ada_log(
"SPECIAL_AUTHORITY_SLASHES ",
521 helpers::substring(url_data, input_position));
526 if (url_data.substr(input_position, 2) ==
"//") {
533 ada_log(
"SPECIAL_AUTHORITY_IGNORE_SLASHES ",
534 helpers::substring(url_data, input_position));
538 while ((input_position != input_size) &&
539 ((url_data[input_position] ==
'/') ||
540 (url_data[input_position] ==
'\\'))) {
548 ada_log(
"QUERY ", helpers::substring(url_data, input_position));
549 if constexpr (store_values) {
552 const uint8_t* query_percent_encode_set =
558 url.update_base_search(url_data.substr(input_position),
559 query_percent_encode_set);
560 ada_log(
"QUERY update_base_search completed ");
561 if (fragment.has_value()) {
562 url.update_unencoded_base_hash(*fragment);
568 ada_log(
"HOST ", helpers::substring(url_data, input_position));
570 std::string_view host_view = url_data.substr(input_position);
571 auto [location, found_colon] =
572 helpers::get_host_delimiter_location(
url.
is_special(), host_view);
573 input_position = (location != std::string_view::npos)
574 ? input_position + location
583 ada_log(
"HOST parsing ", host_view);
584 if (!
url.parse_host(host_view)) {
605 ada_log(
"HOST parsing ", host_view,
" href=",
url.
get_href());
608 if (host_view.empty()) {
609 url.update_base_hostname(
"");
610 }
else if (!
url.parse_host(host_view)) {
623 ada_log(
"OPAQUE_PATH ", helpers::substring(url_data, input_position));
624 std::string_view view = url_data.substr(input_position);
627 size_t location = view.find(
'?');
628 if (location != std::string_view::npos) {
629 view.remove_suffix(view.size() - location);
631 input_position += location + 1;
633 input_position = input_size + 1;
639 if (view.ends_with(
' ')) {
640 std::string modified_view =
641 std::string(view.substr(0, view.size() - 1)) +
"%20";
642 url.update_base_pathname(unicode::percent_encode(
645 url.update_base_pathname(unicode::percent_encode(
651 ada_log(
"PORT ", helpers::substring(url_data, input_position));
652 std::string_view port_view = url_data.substr(input_position);
653 input_position +=
url.parse_port(port_view,
true);
661 ada_log(
"PATH_START ", helpers::substring(url_data, input_position));
670 if (input_position == input_size) {
671 if constexpr (store_values) {
672 url.update_base_pathname(
"/");
673 if (fragment.has_value()) {
674 url.update_unencoded_base_hash(*fragment);
682 if ((url_data[input_position] !=
'/') &&
683 (url_data[input_position] !=
'\\')) {
689 else if ((input_position != input_size) &&
690 (url_data[input_position] ==
'?')) {
694 else if (input_position != input_size) {
699 if (url_data[input_position] !=
'/') {
708 ada_log(
"PATH ", helpers::substring(url_data, input_position));
709 std::string_view view = url_data.substr(input_position);
713 size_t locofquestionmark = view.find(
'?');
714 if (locofquestionmark != std::string_view::npos) {
716 view.remove_suffix(view.size() - locofquestionmark);
717 input_position += locofquestionmark + 1;
719 input_position = input_size + 1;
721 if constexpr (store_values) {
722 if constexpr (result_type_is_ada_url) {
723 helpers::parse_prepared_path(view,
url.type,
url.path);
725 url.consume_prepared_path(view);
732 ada_log(
"FILE_SLASH ", helpers::substring(url_data, input_position));
735 if ((input_position != input_size) &&
736 (url_data[input_position] ==
'/' ||
737 url_data[input_position] ==
'\\')) {
738 ada_log(
"FILE_SLASH c is U+002F or U+005C");
743 ada_log(
"FILE_SLASH otherwise");
749 if constexpr (result_type_is_ada_url) {
750 url.host = base_url->host;
752 url.update_host_to_base_host(base_url->get_host());
758 if (!base_url->get_pathname().empty()) {
760 url_data.substr(input_position))) {
761 std::string_view first_base_url_path =
762 base_url->get_pathname().substr(1);
763 size_t loc = first_base_url_path.find(
'/');
764 if (loc != std::string_view::npos) {
765 helpers::resize(first_base_url_path, loc);
768 first_base_url_path)) {
769 if constexpr (result_type_is_ada_url) {
771 url.path += first_base_url_path;
773 url.append_base_pathname(
774 helpers::concat(
"/", first_base_url_path));
788 ada_log(
"FILE_HOST ", helpers::substring(url_data, input_position));
789 std::string_view view = url_data.substr(input_position);
791 size_t location = view.find_first_of(
"/\\?");
792 std::string_view file_host_buffer(
794 (location != std::string_view::npos) ? location : view.size());
798 }
else if (file_host_buffer.empty()) {
800 if constexpr (result_type_is_ada_url) {
803 url.update_base_hostname(
"");
808 size_t consumed_bytes = file_host_buffer.size();
809 input_position += consumed_bytes;
812 if (!
url.parse_host(file_host_buffer)) {
816 if constexpr (result_type_is_ada_url) {
818 if (
url.host.has_value() &&
url.host.value() ==
"localhost") {
823 url.update_base_hostname(
"");
834 ada_log(
"FILE ", helpers::substring(url_data, input_position));
835 std::string_view file_view = url_data.substr(input_position);
837 url.set_protocol_as_file();
838 if constexpr (result_type_is_ada_url) {
842 url.update_base_hostname(
"");
845 if (input_position != input_size &&
846 (url_data[input_position] ==
'/' ||
847 url_data[input_position] ==
'\\')) {
848 ada_log(
"FILE c is U+002F or U+005C");
856 ada_log(
"FILE base non-null");
857 if constexpr (result_type_is_ada_url) {
858 url.host = base_url->host;
859 url.path = base_url->path;
860 url.query = base_url->query;
862 url.update_host_to_base_host(base_url->get_hostname());
863 url.update_base_pathname(base_url->get_pathname());
864 if (base_url->has_search()) {
868 auto s = base_url->get_search();
869 url.update_base_search(s.empty() ? std::string_view(
"?") : s);
876 if (input_position != input_size && url_data[input_position] ==
'?') {
880 else if (input_position != input_size) {
886 if constexpr (result_type_is_ada_url) {
887 helpers::shorten_path(
url.path,
url.type);
890 if (helpers::shorten_path(path,
url.type)) {
891 url.update_base_pathname(std::move(std::string(path)));
898 url.clear_pathname();
909 ada_log(
"FILE go to path");
921 if constexpr (store_values) {
922 if (fragment.has_value()) {
923 url.update_unencoded_base_hash(*fragment);