aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org>2021-03-11 16:37:16 +0200
committerSergey Poznyakoff <gray@gnu.org>2021-03-11 17:06:50 +0200
commit94c610befbf434c892f1f5fe038baf3570f462bb (patch)
tree598961917e9792541e90e9b8c100db0b3c353e14
parent1ed04dfdee21462c2b080e7913f3153122964c7c (diff)
downloadhaproxy-bulkredirect-94c610befbf434c892f1f5fe038baf3570f462bb.tar.gz
haproxy-bulkredirect-94c610befbf434c892f1f5fe038baf3570f462bb.tar.bz2
Implement URL percent encoding
* bulkredirect.lua (urlencode): New global. (bulkredirect.request): Convert hex digits in %XX fragments of the path to upper case. Make sure dt is a table before addressing its elements. (parseopt): New flag urlencode. (load_redirect_file): Initialize domopt from global settings. Percent-encode the path components if urlencode is set. Otherwise, make sure all %XX are in upper case. * t/testsuite: Test URL encoding. * README: Update.
-rw-r--r--README48
-rw-r--r--bulkredirect.lua87
-rwxr-xr-xt/testsuite21
3 files changed, 131 insertions, 25 deletions
diff --git a/README b/README
index be36aff..87da0e6 100644
--- a/README
+++ b/README
@@ -59,6 +59,23 @@ ignored. Empty lines are ignored as well.
There are three kinds of statements: option definition, domain
declaration, and redirection rule.
+** Domain declaration
+
+This statement declares a domain for which the redirection rules below
+apply. Syntactically, it is:
+
+ [DOMAIN]
+
+where DOMAIN is the domain name. No whitespace is allowed between the
+name and square brackets.
+
+Each domain declaration remains in effect until another domain
+declaration is encountered in the redirection table.
+
+Special name "*" means "any domain name".
+
+A redirection table file must declare at least one domain.
+
** Option definition
Option definition begins with the word 'option' followed by one or
@@ -99,34 +116,27 @@ Discard any query string attached to the incoming URI.
Creates a temporary redirect (HTTP response code 302). By default,
permanent redirects (301) are created.
+*** urlencode
+
+Encode special characters in path parts of both source and destination
+URLs as specified in RFC 3986 ("percent encoding"). By default,
+bulkencode assumes all URLs are already properly encoded.
+
Each of these option names can be prefixed with 'no' to revert its
meaning.
By default all options are unset.
-The options defined in this statement remain in effect until changed
-by another 'option' statement below. They also can be overridden for
-each redirect individually. See the discussion of redirection rules
-below.
-
Here is an example of the 'option' statement:
option www,stripquery
-** Domain declaration
-
-This statement declares a domain for which the redirection rules below
-apply. Syntactically, it is:
-
- [DOMAIN]
-
-where DOMAIN is the domain name. No whitespace is allowed between the
-name and square brackets.
-
-Each domain declaration remains in effect until another domain
-declaration is encountered in the redirection table.
-
-Special name "*" means "any domain name".
+The 'option' statement that appears before the first domain
+declaration sets the default options for each domain declared in the
+file. These defaults can be changed for each domain individually,
+by placing an 'option' statement after declaring that domain. Options
+set by such statement remain in effect for each redirect that appears
+after it, until overridden by another 'option' statement.
** Redirection rule
diff --git a/bulkredirect.lua b/bulkredirect.lua
index 211d335..514a63b 100644
--- a/bulkredirect.lua
+++ b/bulkredirect.lua
@@ -35,6 +35,56 @@ local function prevsegm (t)
end
end
+-- Translation table for percent encoding.
+local urltrans = {
+ '%01', '%02', '%03', '%04', '%05', '%06', '%07', '%08',
+ '%09', '%0A', '%0B', '%0C', '%0D', '%0E', '%0F', '%10',
+ '%11', '%12', '%13', '%14', '%15', '%16', '%17', '%18',
+ '%19', '%1A', '%1B', '%1C', '%1D', '%1E', '%1F', '%20',
+ '%21', '%22', '%23', '%24', '%25', '%26', '%27', '%28',
+ '%29', '%2A', '%2B', '%2C', '-', '.', '/', '0',
+ '1', '2', '3', '4', '5', '6', '7', '8',
+ '9', '%3A', '%3B', '%3C', '%3D', '%3E', '%3F', '%40',
+ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
+ 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
+ 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
+ 'Y', 'Z', '%5B', '%5C', '%5D', '%5E', '_', '%60',
+ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h',
+ 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p',
+ 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
+ 'y', 'z', '%7B', '%7C', '%7D', '~', '%7F', '%80',
+ '%81', '%82', '%83', '%84', '%85', '%86', '%87', '%88',
+ '%89', '%8A', '%8B', '%8C', '%8D', '%8E', '%8F', '%90',
+ '%91', '%92', '%93', '%94', '%95', '%96', '%97', '%98',
+ '%99', '%9A', '%9B', '%9C', '%9D', '%9E', '%9F', '%A0',
+ '%A1', '%A2', '%A3', '%A4', '%A5', '%A6', '%A7', '%A8',
+ '%A9', '%AA', '%AB', '%AC', '%AD', '%AE', '%AF', '%B0',
+ '%B1', '%B2', '%B3', '%B4', '%B5', '%B6', '%B7', '%B8',
+ '%B9', '%BA', '%BB', '%BC', '%BD', '%BE', '%BF', '%C0',
+ '%C1', '%C2', '%C3', '%C4', '%C5', '%C6', '%C7', '%C8',
+ '%C9', '%CA', '%CB', '%CC', '%CD', '%CE', '%CF', '%D0',
+ '%D1', '%D2', '%D3', '%D4', '%D5', '%D6', '%D7', '%D8',
+ '%D9', '%DA', '%DB', '%DC', '%DD', '%DE', '%DF', '%E0',
+ '%E1', '%E2', '%E3', '%E4', '%E5', '%E6', '%E7', '%E8',
+ '%E9', '%EA', '%EB', '%EC', '%ED', '%EE', '%EF', '%F0',
+ '%F1', '%F2', '%F3', '%F4', '%F5', '%F6', '%F7', '%F8',
+ '%F9', '%FA', '%FB', '%FC', '%FD', '%FE', '%FF',
+}
+
+-- Apply percent encoding to the path part of S.
+local function url_path_encode (s)
+ local t = {}
+ local p, q = s:match('^(.+)(?.+)')
+ if p then
+ t[#p+1] = q
+ s = p
+ end
+ for i = 1, #s do
+ t[i] = urltrans[s:byte(i)]
+ end
+ return table.concat(t)
+end
+
--
-- Module global variables:
--
@@ -110,13 +160,19 @@ stripquery = false
temporary = false
-- Whether to return temporary (302) or permanent (301) reply.
+urlencode = true
+--[[ Encode special characters in path parts of both source and destination
+ URLs as specified in RFC 3986 ("percent encoding"). By default,
+ bulkencode assumes all URLs are already properly encoded.
+]]
+
--
-- Redirect the request if it matches one of the entries in the RT table.
--
function bulkredirect.request (txn)
local headers = txn.http:req_get_headers()
local reply = txn:reply()
- local path = txn.f:path():sub(2)
+ local path = txn.f:path():sub(2):gsub('%%%x%x', string.upper)
local host = headers["host"][0]
-- Get the per-host redirection table
@@ -157,6 +213,10 @@ function bulkredirect.request (txn)
dt = dt[1]
end
+ if type(dt) == 'string' then
+ dt = { dt }
+ end
+
location = dt[1]
if dt[2] ~= nil then
exact = dt[2]
@@ -174,13 +234,13 @@ function bulkredirect.request (txn)
location = rthost[i]
end
end
-
+
if location then
if not (location:match('^http://') or location:match('^https://')
or location:match('^/')) then
location = '/'..location
end
-
+
if not exact or i == path or i..'/' == path then
if not strippath then
location = location .. path:sub(i:len() + 1)
@@ -225,7 +285,8 @@ local function parseopt (s, t, loc)
['exact'] = true,
['strippath'] = true,
['stripquery'] = true,
- ['temporary'] = true
+ ['temporary'] = true,
+ ['urlencode'] = true
}
function options (str, loc)
@@ -276,6 +337,7 @@ local function set_dst (dt, src, dst)
elseif type(dt[path]) == 'string' then
dt[path] = { { dt[path] }, {} }
end
+-- core.Info(require('inspect')(dt[path]))
dt[path][2][query] = dst
elseif type(dt[src]) == 'table' then
dt[src][1] = dst
@@ -333,7 +395,7 @@ local function load_redirect_file (f, filename)
local ln = 1
local rt = {}
- local domopt = {}
+ local domopt
local parsetab = {
{ '^#', function () end },
@@ -353,6 +415,12 @@ local function load_redirect_file (f, filename)
function (s)
domain = s
if not rt[domain] then rt[domain] = {} end
+ domopt = {
+ exact = exact,
+ strippath = strippath,
+ stripquery = stripquery,
+ temporary = temporary
+ }
end
},
{ '^%s*([^%s]+)%s+([^%s]+)%s*(.*)$',
@@ -369,12 +437,19 @@ local function load_redirect_file (f, filename)
dst = dst:sub(2)
end
- local optab = domopt
+ local optab = domopt
if optlist ~= '' then
optab = clone(domopt)
parseopt(optlist, optab, filename .. ':' .. ln)
end
+ if optab['urlencode'] then
+ src = url_path_encode(src)
+ dst = url_path_encode(dst)
+ else
+ src = src:gsub('%%%x%x', string.upper)
+ end
+
local dpath, dquery = dst:match('^(.+)?(.*)')
if dpath then
if optab['strippath'] == false and optab['strippath'] ~= domopt['strippath'] then
diff --git a/t/testsuite b/t/testsuite
index c4d3333..2a62e8d 100755
--- a/t/testsuite
+++ b/t/testsuite
@@ -406,6 +406,27 @@ runtest \
URL='example.org/third?query=true' \
HTTP_LOCATION='/com?pp=false&ok=1'
+# ####################
+group_header URL Encoding
+
+runtest \
+ RTFILE=f.tab \
+ DESCR='path only' \
+ URL='example.org/l%c3%b8rdag' \
+ HTTP_LOCATION=/saturday
+
+runtest \
+ RTFILE=f.tab \
+ DESCR='with query' \
+ URL='example.org/l%c3%b8rdag?q=1' \
+ HTTP_LOCATION='/satq?q=1'
+
+runtest \
+ RTFILE=f.tab \
+ DESCR='nourlencode' \
+ URL='example.org/s%c3%b8ndag' \
+ HTTP_LOCATION='/sunday'
+
echo
echo "Run $(($runtest_success_count + $runtest_failure_count)) tests."
if [ $runtest_failure_count -eq 0 ]; then

Return to:

Send suggestions and report system problems to the System administrator.