namespace Cobra.Core class UrlUtils """ Provides simple URL-related utilities like .encode and .decode without having to link to a large web-related library like System.Web. """ test urls = [ ['http://alpha.com/bravo charlie', 'http%3a%2f%2falpha.com%2fbravo+charlie'], ['http://alpha.com/bravo?foo="charlie"', 'http%3a%2f%2falpha.com%2fbravo%3ffoo%3d%22charlie%22'], # ['beta.com/β', 'beta.com%2f%ce%b2'], # ['foo.com/hey—now', 'foo.com%2fhey%e2%80%94now'], ['bar.com/%', 'bar.com%2f%25'], [nil, ''], ] encoded = [ ['http%3a%2f%2falpha.com%2fbravo+charlie', 'http://alpha.com/bravo charlie'], ['alpha.com%2F%3Femail%3Dfoo%40bar.com', 'alpha.com/?email=foo@bar.com'], # ['gamma.com%2f%ce%b3', 'gamma.com/γ'], [nil, ''], ] for url, encodedUrl in urls assert .encode(url) == encodedUrl for encodedUrl, decodedUrl in encoded assert .decode(encodedUrl) == decodedUrl for url, encodedUrl in urls if url, assert .decode(.encode(url)) == url shared def decode(url as String?) as String if url is nil, return '' hasEncoding = url.indexOf('%') >= 0 or url.indexOf('+') >= 0 if not hasEncoding, return url to ! len = url.length enc = Encoding.utf8 bytes = List(len) xchar as int for i in len ch = url[i] if ch == c'%' and i + 2 < len and url[i + 1] <> c'%' if (xchar = _getChar(url to !, i + 1, 2)) <> -1 _writeCharBytes(bytes, xchar to char, enc) i += 2 else, _writeCharBytes(bytes, c'%', enc) continue if ch == c'+', _writeCharBytes(bytes, c' ', enc) else, _writeCharBytes(bytes, ch, enc) buf = bytes.toArray return enc.getString(buf) to ! def encode(url as String?) as String if url is nil, return '' if url.length == 0, return '' enc = Encoding.utf8 needEncode = false len = url.length for i in len ch = url[i] if ch < c'0' or (ch < c'A' and ch > c'9') or (ch > c'Z' and ch < c'a') or ch > c'z' if _notEncoded(ch), continue needEncode = true break if not needEncode, return url to ! bytes = uint8[](enc.getMaxByteCount(len)) realLen = enc.getBytes(url, 0, len, bytes, 0) return Encoding.ascii.getString(_urlEncodeToBytes(bytes, 0, realLen)) to ! var _hexChars = '0123456789abcdef'.toCharArray def _getInt(b as uint8) as int ch = b to char if ch >= c'0' and ch <= c'9' return ch to int - c'0' to int if ch >= c'a' and ch <= c'f' return ch to int - c'a' to int + 10 if ch >= c'A' and ch <= c'F' return ch to int - c'A' to int + 10 return -1 def _getChar(bytes as uint8[], offset as int, length as int) as int val = 0 for i in offset : length + offset cur = _getInt(bytes[i]) if cur == -1, return -1 val = (val << 4) + cur return val def _getChar(str as String, offset as int, length as int) as int val = 0 for i in offset : length + offset ch = str[i] if ch to int > 127, return -1 cur = _getInt(ch to uint8) if cur == -1, return -1 val = (val << 4) + cur return val def _writeCharBytes(buf as IList, ch as char, enc as Encoding?) if ch to int > 255 for b in enc.getBytes(@[ch]) buf.add(sharp'b') else buf.add(sharp'(byte)ch') def _urlEncodeToBytes(bytes as uint8[]?, offset as int, count as int) as uint8[]? if not bytes, return nil len = bytes.length if not len, return uint8[](0) result = MemoryStream(count) for i in offset : offset + count _urlEncodeChar(bytes[i] to char, result) return result.toArray def _notEncoded(ch as char) as bool return ch in [c'!', c'(', c')', c'*', c'-', c'.', c'_', c"'"] def _urlEncodeChar(ch as char, result as Stream) if ch > c' ' and _notEncoded(ch) result.writeByte(ch to uint8) return if ch == c' ' result.writeByte(c'+' to uint8) return if ch < c'0' or (ch < c'A' and ch > c'9') or (ch > c'Z' and ch < c'a') or ch > c'z' result.writeByte(c'%' to uint8) codepoint = (ch to int) >> 4 result.writeByte(_hexChars[codepoint] to uint8) codepoint = (ch to int) & 0x0f result.writeByte(_hexChars[codepoint] to uint8) else, result.writeByte(ch to uint8)