Uri.cpp 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. /*
  2. * Copyright 2013-present Facebook, Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <folly/Uri.h>
  17. #include <algorithm>
  18. #include <cctype>
  19. #include <boost/regex.hpp>
  20. namespace folly {
  21. namespace {
  22. std::string submatch(const boost::cmatch& m, int idx) {
  23. const auto& sub = m[idx];
  24. return std::string(sub.first, sub.second);
  25. }
  26. } // namespace
  27. Uri::Uri(StringPiece str) : hasAuthority_(false), port_(0) {
  28. static const boost::regex uriRegex(
  29. "([a-zA-Z][a-zA-Z0-9+.-]*):" // scheme:
  30. "([^?#]*)" // authority and path
  31. "(?:\\?([^#]*))?" // ?query
  32. "(?:#(.*))?"); // #fragment
  33. static const boost::regex authorityAndPathRegex("//([^/]*)(/.*)?");
  34. boost::cmatch match;
  35. if (UNLIKELY(!boost::regex_match(str.begin(), str.end(), match, uriRegex))) {
  36. throw std::invalid_argument(to<std::string>("invalid URI ", str));
  37. }
  38. scheme_ = submatch(match, 1);
  39. std::transform(scheme_.begin(), scheme_.end(), scheme_.begin(), ::tolower);
  40. StringPiece authorityAndPath(match[2].first, match[2].second);
  41. boost::cmatch authorityAndPathMatch;
  42. if (!boost::regex_match(
  43. authorityAndPath.begin(),
  44. authorityAndPath.end(),
  45. authorityAndPathMatch,
  46. authorityAndPathRegex)) {
  47. // Does not start with //, doesn't have authority
  48. hasAuthority_ = false;
  49. path_ = authorityAndPath.str();
  50. } else {
  51. static const boost::regex authorityRegex(
  52. "(?:([^@:]*)(?::([^@]*))?@)?" // username, password
  53. "(\\[[^\\]]*\\]|[^\\[:]*)" // host (IP-literal (e.g. '['+IPv6+']',
  54. // dotted-IPv4, or named host)
  55. "(?::(\\d*))?"); // port
  56. const auto authority = authorityAndPathMatch[1];
  57. boost::cmatch authorityMatch;
  58. if (!boost::regex_match(
  59. authority.first,
  60. authority.second,
  61. authorityMatch,
  62. authorityRegex)) {
  63. throw std::invalid_argument(to<std::string>(
  64. "invalid URI authority ",
  65. StringPiece(authority.first, authority.second)));
  66. }
  67. StringPiece port(authorityMatch[4].first, authorityMatch[4].second);
  68. if (!port.empty()) {
  69. port_ = to<uint16_t>(port);
  70. }
  71. hasAuthority_ = true;
  72. username_ = submatch(authorityMatch, 1);
  73. password_ = submatch(authorityMatch, 2);
  74. host_ = submatch(authorityMatch, 3);
  75. path_ = submatch(authorityAndPathMatch, 2);
  76. }
  77. query_ = submatch(match, 3);
  78. fragment_ = submatch(match, 4);
  79. }
  80. std::string Uri::authority() const {
  81. std::string result;
  82. // Port is 5 characters max and we have up to 3 delimiters.
  83. result.reserve(host().size() + username().size() + password().size() + 8);
  84. if (!username().empty() || !password().empty()) {
  85. result.append(username());
  86. if (!password().empty()) {
  87. result.push_back(':');
  88. result.append(password());
  89. }
  90. result.push_back('@');
  91. }
  92. result.append(host());
  93. if (port() != 0) {
  94. result.push_back(':');
  95. toAppend(port(), &result);
  96. }
  97. return result;
  98. }
  99. std::string Uri::hostname() const {
  100. if (host_.size() > 0 && host_[0] == '[') {
  101. // If it starts with '[', then it should end with ']', this is ensured by
  102. // regex
  103. return host_.substr(1, host_.size() - 2);
  104. }
  105. return host_;
  106. }
  107. const std::vector<std::pair<std::string, std::string>>& Uri::getQueryParams() {
  108. if (!query_.empty() && queryParams_.empty()) {
  109. // Parse query string
  110. static const boost::regex queryParamRegex(
  111. "(^|&)" /*start of query or start of parameter "&"*/
  112. "([^=&]*)=?" /*parameter name and "=" if value is expected*/
  113. "([^=&]*)" /*parameter value*/
  114. "(?=(&|$))" /*forward reference, next should be end of query or
  115. start of next parameter*/);
  116. const boost::cregex_iterator paramBeginItr(
  117. query_.data(), query_.data() + query_.size(), queryParamRegex);
  118. boost::cregex_iterator paramEndItr;
  119. for (auto itr = paramBeginItr; itr != paramEndItr; ++itr) {
  120. if (itr->length(2) == 0) {
  121. // key is empty, ignore it
  122. continue;
  123. }
  124. queryParams_.emplace_back(
  125. std::string((*itr)[2].first, (*itr)[2].second), // parameter name
  126. std::string((*itr)[3].first, (*itr)[3].second) // parameter value
  127. );
  128. }
  129. }
  130. return queryParams_;
  131. }
  132. } // namespace folly