summaryrefslogtreecommitdiff
path: root/pith/url.c
diff options
context:
space:
mode:
authorEduardo Chappa <chappa@washington.edu>2013-07-18 20:58:04 -0600
committerEduardo Chappa <chappa@washington.edu>2013-07-18 20:58:04 -0600
commit3463dc142b787d8010cf9310caa7915f30a9e275 (patch)
treea11c9653117456b588bece8520877834f9bd519a /pith/url.c
parent6f187653ec9cc2122670cd67d01bf8394dc62fe2 (diff)
downloadalpine-3463dc142b787d8010cf9310caa7915f30a9e275.tar.xz
* Extended support for recognition of UTF-8 in urls, according to information
from http://url.spec.whatwg.org
Diffstat (limited to 'pith/url.c')
-rw-r--r--pith/url.c41
1 files changed, 39 insertions, 2 deletions
diff --git a/pith/url.c b/pith/url.c
index 173cb879..1798320d 100644
--- a/pith/url.c
+++ b/pith/url.c
@@ -248,11 +248,48 @@ rfc1738_str(char *s)
int
rfc1738uchar(char *s)
{
- return((RFC1738_ESC(s)) /* "escape" */
+ int valid = (RFC1738_ESC(s)) /* "escape" */
? 2
: (isalnum((unsigned char) *s) /* alphanumeric */
|| strchr(RFC1738_SAFE, *s) /* other special stuff */
- || strchr(RFC1738_EXTRA, *s)));
+ || strchr(RFC1738_EXTRA, *s));
+
+ if(!valid){
+ char *t;
+ UCS ucs;
+ CBUF_S cbuf;
+
+ cbuf.cbuf[0] = '\0';
+ cbuf.cbufp = cbuf.cbuf;
+ cbuf.cbufend = cbuf.cbuf;
+
+ for(t = s; t && *t; t++){
+ if(utf8_to_ucs4_oneatatime((unsigned char) *t & 0xff, &cbuf, &ucs, NULL)){
+ if ((ucs >= 0x00A0 && ucs <= 0xD7FF)
+ || (ucs >= 0xE000 && ucs <= 0xFDCF)
+ || (ucs >= 0xFDF0 && ucs <= 0xFFEF)
+ || (ucs >= 0x10000 && ucs <= 0x1FFFD)
+ || (ucs >= 0x20000 && ucs <= 0x2FFFD)
+ || (ucs >= 0x30000 && ucs <= 0x3FFFD)
+ || (ucs >= 0x40000 && ucs <= 0x4FFFD)
+ || (ucs >= 0x50000 && ucs <= 0x5FFFD)
+ || (ucs >= 0x60000 && ucs <= 0x6FFFD)
+ || (ucs >= 0x70000 && ucs <= 0x7FFFD)
+ || (ucs >= 0x80000 && ucs <= 0x8FFFD)
+ || (ucs >= 0x90000 && ucs <= 0x9FFFD)
+ || (ucs >= 0xA0000 && ucs <= 0xAFFFD)
+ || (ucs >= 0xB0000 && ucs <= 0xBFFFD)
+ || (ucs >= 0xC0000 && ucs <= 0xCFFFD)
+ || (ucs >= 0xD0000 && ucs <= 0xDFFFD)
+ || (ucs >= 0xE0000 && ucs <= 0xEFFFD)
+ || (ucs >= 0xF0000 && ucs <= 0xFFFFD)
+ || (ucs >= 0x100000 && ucs <= 0x10FFFD))
+ valid = t-s+1;
+ break;
+ }
+ }
+ }
+ return valid;
}