summaryrefslogtreecommitdiff
path: root/vendor/ezyang/htmlpurifier/library/HTMLPurifier/URIParser.php
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/ezyang/htmlpurifier/library/HTMLPurifier/URIParser.php')
-rw-r--r--vendor/ezyang/htmlpurifier/library/HTMLPurifier/URIParser.php71
1 files changed, 71 insertions, 0 deletions
diff --git a/vendor/ezyang/htmlpurifier/library/HTMLPurifier/URIParser.php b/vendor/ezyang/htmlpurifier/library/HTMLPurifier/URIParser.php
new file mode 100644
index 0000000..0e7381a
--- /dev/null
+++ b/vendor/ezyang/htmlpurifier/library/HTMLPurifier/URIParser.php
@@ -0,0 +1,71 @@
+<?php
+
+/**
+ * Parses a URI into the components and fragment identifier as specified
+ * by RFC 3986.
+ */
+class HTMLPurifier_URIParser
+{
+
+ /**
+ * Instance of HTMLPurifier_PercentEncoder to do normalization with.
+ */
+ protected $percentEncoder;
+
+ public function __construct()
+ {
+ $this->percentEncoder = new HTMLPurifier_PercentEncoder();
+ }
+
+ /**
+ * Parses a URI.
+ * @param $uri string URI to parse
+ * @return HTMLPurifier_URI representation of URI. This representation has
+ * not been validated yet and may not conform to RFC.
+ */
+ public function parse($uri)
+ {
+ $uri = $this->percentEncoder->normalize($uri);
+
+ // Regexp is as per Appendix B.
+ // Note that ["<>] are an addition to the RFC's recommended
+ // characters, because they represent external delimeters.
+ $r_URI = '!'.
+ '(([a-zA-Z0-9\.\+\-]+):)?'. // 2. Scheme
+ '(//([^/?#"<>]*))?'. // 4. Authority
+ '([^?#"<>]*)'. // 5. Path
+ '(\?([^#"<>]*))?'. // 7. Query
+ '(#([^"<>]*))?'. // 8. Fragment
+ '!';
+
+ $matches = array();
+ $result = preg_match($r_URI, $uri, $matches);
+
+ if (!$result) return false; // *really* invalid URI
+
+ // seperate out parts
+ $scheme = !empty($matches[1]) ? $matches[2] : null;
+ $authority = !empty($matches[3]) ? $matches[4] : null;
+ $path = $matches[5]; // always present, can be empty
+ $query = !empty($matches[6]) ? $matches[7] : null;
+ $fragment = !empty($matches[8]) ? $matches[9] : null;
+
+ // further parse authority
+ if ($authority !== null) {
+ $r_authority = "/^((.+?)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
+ $matches = array();
+ preg_match($r_authority, $authority, $matches);
+ $userinfo = !empty($matches[1]) ? $matches[2] : null;
+ $host = !empty($matches[3]) ? $matches[3] : '';
+ $port = !empty($matches[4]) ? (int) $matches[5] : null;
+ } else {
+ $port = $host = $userinfo = null;
+ }
+
+ return new HTMLPurifier_URI(
+ $scheme, $userinfo, $host, $port, $path, $query, $fragment);
+ }
+
+}
+
+// vim: et sw=4 sts=4