summaryrefslogtreecommitdiff
path: root/usr.bin/ftp
diff options
context:
space:
mode:
authorKlemens Nanni <kn@cvs.openbsd.org>2021-11-06 14:27:46 +0000
committerKlemens Nanni <kn@cvs.openbsd.org>2021-11-06 14:27:46 +0000
commitc8032a645b499d4e5856bc996dd74d6f2298b997 (patch)
tree34b89666d4a1fb1ce3ccfb5631624c0dd0613c1b /usr.bin/ftp
parent8536bd8daee944e2a5861d0e14c418090384e2a7 (diff)
Stop URL encoding the tilde character
RFC 1738 Uniform Resource Locators (URL) lists tilde as unsafe character. RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax updates it to The tilde "~" character was added to those in the "unreserved" set, since it is extensively used on the Internet in spite of the difficulty to transcribe it with some keyboards. In theory, this shouldn't make a difference, but some servers do not decode "%7e" and thus erroneously serve a 404. RFC 2396 2.4.2. When to Escape and Unescape says: In some cases, data that could be represented by an unreserved character may appear escaped; for example, some of the unreserved "mark" characters are automatically escaped by some systems. If the given URI scheme defines a canonicalization algorithm, then unreserved characters may be unescaped according to that algorithm. For example, "%7e" is sometimes used instead of "~" in an http URL path, but the two are equivalent for an http URL. Update ftp(1) to RFC 2396 by no longer treating "~" as unsafe character. This is effectively a one-character diff; update comments accordingly as well as the order of characters to ease code-to-standard comparison. This matches curl(1) and wget(1) behaviour wrt. encoding of "~". OK sthen
Diffstat (limited to 'usr.bin/ftp')
-rw-r--r--usr.bin/ftp/fetch.c29
1 files changed, 15 insertions, 14 deletions
diff --git a/usr.bin/ftp/fetch.c b/usr.bin/ftp/fetch.c
index c83ee02ae36..cfc68b08b02 100644
--- a/usr.bin/ftp/fetch.c
+++ b/usr.bin/ftp/fetch.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: fetch.c,v 1.205 2021/08/31 09:51:25 claudio Exp $ */
+/* $OpenBSD: fetch.c,v 1.206 2021/11/06 14:27:45 kn Exp $ */
/* $NetBSD: fetch.c,v 1.14 1997/08/18 10:20:20 lukem Exp $ */
/*-
@@ -106,14 +106,17 @@ static int redirect_loop;
static int retried;
/*
- * Determine whether the character needs encoding, per RFC1738:
- * - No corresponding graphic US-ASCII.
- * - Unsafe characters.
+ * Determine whether the character needs encoding, per RFC2396.
*/
static int
-unsafe_char(const char *c0)
+to_encode(const char *c0)
{
- const char *unsafe_chars = " <>\"#{}|\\^~[]`";
+ /* 2.4.3. Excluded US-ASCII Characters */
+ const char *excluded_chars =
+ " " /* space */
+ "<>#\"" /* delims (modulo "%", see below) */
+ "{}|\\^[]`" /* unwise */
+ ;
const unsigned char *c = (const unsigned char *)c0;
/*
@@ -123,16 +126,15 @@ unsafe_char(const char *c0)
return (iscntrl(*c) || !isascii(*c) ||
/*
- * Unsafe characters.
- * '%' is also unsafe, if is not followed by two
+ * '%' is also reserved, if is not followed by two
* hexadecimal digits.
*/
- strchr(unsafe_chars, *c) != NULL ||
+ strchr(excluded_chars, *c) != NULL ||
(*c == '%' && (!isxdigit(c[1]) || !isxdigit(c[2]))));
}
/*
- * Encode given URL, per RFC1738.
+ * Encode given URL, per RFC2396.
* Allocate and return string to the caller.
*/
static char *
@@ -145,11 +147,10 @@ url_encode(const char *path)
/*
* First pass:
- * Count unsafe characters, and determine length of the
- * final URL.
+ * Count characters to encode and determine length of the final URL.
*/
for (i = 0; i < length; i++)
- if (unsafe_char(path + i))
+ if (to_encode(path + i))
new_length += 2;
epath = epathp = malloc(new_length + 1); /* One more for '\0'. */
@@ -161,7 +162,7 @@ url_encode(const char *path)
* Encode, and copy final URL.
*/
for (i = 0; i < length; i++)
- if (unsafe_char(path + i)) {
+ if (to_encode(path + i)) {
snprintf(epathp, 4, "%%" "%02x",
(unsigned char)path[i]);
epathp += 3;