1  
//
1  
//
2  
// Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com)
2  
// Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com)
3  
//
3  
//
4  
// Distributed under the Boost Software License, Version 1.0. (See accompanying
4  
// Distributed under the Boost Software License, Version 1.0. (See accompanying
5  
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5  
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6  
//
6  
//
7  
// Official repository: https://github.com/boostorg/url
7  
// Official repository: https://github.com/boostorg/url
8  
//
8  
//
9  

9  

10  
#ifndef BOOST_URL_IMPL_ENCODE_HPP
10  
#ifndef BOOST_URL_IMPL_ENCODE_HPP
11  
#define BOOST_URL_IMPL_ENCODE_HPP
11  
#define BOOST_URL_IMPL_ENCODE_HPP
12  

12  

13  
#include "boost/url/grammar/token_rule.hpp"
13  
#include "boost/url/grammar/token_rule.hpp"
14  
#include <boost/assert.hpp>
14  
#include <boost/assert.hpp>
15  
#include <boost/core/detail/static_assert.hpp>
15  
#include <boost/core/detail/static_assert.hpp>
16  
#include <boost/url/detail/encode.hpp>
16  
#include <boost/url/detail/encode.hpp>
17  
#include <boost/url/detail/except.hpp>
17  
#include <boost/url/detail/except.hpp>
18  
#include <boost/url/encoding_opts.hpp>
18  
#include <boost/url/encoding_opts.hpp>
19  
#include <boost/url/grammar/charset.hpp>
19  
#include <boost/url/grammar/charset.hpp>
20  
#include <boost/url/grammar/hexdig_chars.hpp>
20  
#include <boost/url/grammar/hexdig_chars.hpp>
21  
#include <boost/url/grammar/string_token.hpp>
21  
#include <boost/url/grammar/string_token.hpp>
22  
#include <boost/url/grammar/type_traits.hpp>
22  
#include <boost/url/grammar/type_traits.hpp>
23  

23  

24  
namespace boost {
24  
namespace boost {
25  
namespace urls {
25  
namespace urls {
26  

26  

27  
//------------------------------------------------
27  
//------------------------------------------------
28  

28  

29  
template<BOOST_URL_CONSTRAINT(grammar::CharSet) CS>
29  
template<BOOST_URL_CONSTRAINT(grammar::CharSet) CS>
30  
std::size_t
30  
std::size_t
31  
encoded_size(
31  
encoded_size(
32  
    core::string_view s,
32  
    core::string_view s,
33  
    CS const& allowed,
33  
    CS const& allowed,
34  
    encoding_opts opt) noexcept
34  
    encoding_opts opt) noexcept
35  
{
35  
{
36  
    /*
36  
    /*
37  
        If you get a compilation error here, it
37  
        If you get a compilation error here, it
38  
        means that the value you passed does
38  
        means that the value you passed does
39  
        not meet the requirements stated in
39  
        not meet the requirements stated in
40  
        the documentation.
40  
        the documentation.
41  
    */
41  
    */
42  
    BOOST_CORE_STATIC_ASSERT(
42  
    BOOST_CORE_STATIC_ASSERT(
43  
        grammar::is_charset<CS>::value);
43  
        grammar::is_charset<CS>::value);
44  

44  

45  
    std::size_t n = 0;
45  
    std::size_t n = 0;
46  
    auto it = s.data();
46  
    auto it = s.data();
47  
    auto const last = it + s.size();
47  
    auto const last = it + s.size();
48  

48  

49  
    if (!opt.space_as_plus)
49  
    if (!opt.space_as_plus)
50  
    {
50  
    {
51  
        while (it != last)
51  
        while (it != last)
52  
        {
52  
        {
53  
            char const c = *it;
53  
            char const c = *it;
54  
            if (allowed(c))
54  
            if (allowed(c))
55  
            {
55  
            {
56  
                ++n;
56  
                ++n;
57  
            }
57  
            }
58  
            else
58  
            else
59  
            {
59  
            {
60  
                n += 3;
60  
                n += 3;
61  
            }
61  
            }
62  
            ++it;
62  
            ++it;
63  
        }
63  
        }
64  
    }
64  
    }
65  
    else
65  
    else
66  
    {
66  
    {
67  
        // '+' is always encoded (thus
67  
        // '+' is always encoded (thus
68  
        // spending 3 chars) even if
68  
        // spending 3 chars) even if
69  
        // allowed because "%2B" and
69  
        // allowed because "%2B" and
70  
        // "+" have different meanings
70  
        // "+" have different meanings
71  
        // when space as plus is enabled
71  
        // when space as plus is enabled
72  
        using FNT = bool (*)(CS const& allowed, char);
72  
        using FNT = bool (*)(CS const& allowed, char);
73  
        FNT takes_one_char =
73  
        FNT takes_one_char =
74  
            allowed('+') ?
74  
            allowed('+') ?
75  
                (allowed(' ') ?
75  
                (allowed(' ') ?
76  
                     FNT([](CS const& allowed, char c){ return allowed(c) && c != '+'; }) :
76  
                     FNT([](CS const& allowed, char c){ return allowed(c) && c != '+'; }) :
77  
                     FNT([](CS const& allowed, char c){ return (allowed(c) || c == ' ') && c != '+'; })) :
77  
                     FNT([](CS const& allowed, char c){ return (allowed(c) || c == ' ') && c != '+'; })) :
78  
                (allowed(' ') ?
78  
                (allowed(' ') ?
79  
                     FNT([](CS const& allowed, char c){ return allowed(c); }) :
79  
                     FNT([](CS const& allowed, char c){ return allowed(c); }) :
80  
                     FNT([](CS const& allowed, char c){ return allowed(c) || c == ' '; }));
80  
                     FNT([](CS const& allowed, char c){ return allowed(c) || c == ' '; }));
81  
        while (it != last)
81  
        while (it != last)
82  
        {
82  
        {
83  
            char const c = *it;
83  
            char const c = *it;
84  
            if (takes_one_char(allowed, c))
84  
            if (takes_one_char(allowed, c))
85  
            {
85  
            {
86  
                ++n;
86  
                ++n;
87  
            }
87  
            }
88  
            else
88  
            else
89  
            {
89  
            {
90  
                n += 3;
90  
                n += 3;
91  
            }
91  
            }
92  
            ++it;
92  
            ++it;
93  
        }
93  
        }
94  
    }
94  
    }
95  
    return n;
95  
    return n;
96  
}
96  
}
97  

97  

98  
//------------------------------------------------
98  
//------------------------------------------------
99  

99  

100  
template<BOOST_URL_CONSTRAINT(grammar::CharSet) CS>
100  
template<BOOST_URL_CONSTRAINT(grammar::CharSet) CS>
101  
std::size_t
101  
std::size_t
102  
encode(
102  
encode(
103  
    char* dest,
103  
    char* dest,
104  
    std::size_t size,
104  
    std::size_t size,
105  
    core::string_view s,
105  
    core::string_view s,
106  
    CS const& allowed,
106  
    CS const& allowed,
107  
    encoding_opts opt)
107  
    encoding_opts opt)
108  
{
108  
{
109  
/*  If you get a compilation error here, it
109  
/*  If you get a compilation error here, it
110  
    means that the value you passed does
110  
    means that the value you passed does
111  
    not meet the requirements stated in
111  
    not meet the requirements stated in
112  
    the documentation.
112  
    the documentation.
113  
*/
113  
*/
114  
    BOOST_CORE_STATIC_ASSERT(
114  
    BOOST_CORE_STATIC_ASSERT(
115  
        grammar::is_charset<CS>::value);
115  
        grammar::is_charset<CS>::value);
116  

116  

117  
    // '%' must be reserved
117  
    // '%' must be reserved
118  
    BOOST_ASSERT(!allowed('%'));
118  
    BOOST_ASSERT(!allowed('%'));
119  

119  

120  
    char const* const hex =
120  
    char const* const hex =
121  
        detail::hexdigs[opt.lower_case];
121  
        detail::hexdigs[opt.lower_case];
122  
    auto const encode = [hex](
122  
    auto const encode = [hex](
123  
        char*& dest,
123  
        char*& dest,
124  
        unsigned char c) noexcept
124  
        unsigned char c) noexcept
125  
    {
125  
    {
126  
        *dest++ = '%';
126  
        *dest++ = '%';
127  
        *dest++ = hex[c>>4];
127  
        *dest++ = hex[c>>4];
128  
        *dest++ = hex[c&0xf];
128  
        *dest++ = hex[c&0xf];
129  
    };
129  
    };
130  

130  

131  
    auto it = s.data();
131  
    auto it = s.data();
132  
    auto const end = dest + size;
132  
    auto const end = dest + size;
133  
    auto const last = it + s.size();
133  
    auto const last = it + s.size();
134  
    auto const dest0 = dest;
134  
    auto const dest0 = dest;
135  
    auto const end3 = end - 3;
135  
    auto const end3 = end - 3;
136  

136  

137  
    if (!opt.space_as_plus)
137  
    if (!opt.space_as_plus)
138  
    {
138  
    {
139  
        while(it != last)
139  
        while(it != last)
140  
        {
140  
        {
141  
            char const c = *it;
141  
            char const c = *it;
142  
            if (allowed(c))
142  
            if (allowed(c))
143  
            {
143  
            {
144  
                if(dest == end)
144  
                if(dest == end)
145  
                    return dest - dest0;
145  
                    return dest - dest0;
146  
                *dest++ = c;
146  
                *dest++ = c;
147  
                ++it;
147  
                ++it;
148  
                continue;
148  
                continue;
149  
            }
149  
            }
150  
            if (dest > end3)
150  
            if (dest > end3)
151  
                return dest - dest0;
151  
                return dest - dest0;
152  
            encode(dest, c);
152  
            encode(dest, c);
153  
            ++it;
153  
            ++it;
154  
        }
154  
        }
155  
        return dest - dest0;
155  
        return dest - dest0;
156  
    }
156  
    }
157  
    else
157  
    else
158  
    {
158  
    {
159  
        while (it != last)
159  
        while (it != last)
160  
        {
160  
        {
161  
            char const c = *it;
161  
            char const c = *it;
162  
            if (c == ' ')
162  
            if (c == ' ')
163  
            {
163  
            {
164  
                if(dest == end)
164  
                if(dest == end)
165  
                    return dest - dest0;
165  
                    return dest - dest0;
166  
                *dest++ = '+';
166  
                *dest++ = '+';
167  
                ++it;
167  
                ++it;
168  
                continue;
168  
                continue;
169  
            }
169  
            }
170  
            else if (
170  
            else if (
171  
                allowed(c) &&
171  
                allowed(c) &&
172  
                c != '+')
172  
                c != '+')
173  
            {
173  
            {
174  
                if(dest == end)
174  
                if(dest == end)
175  
                    return dest - dest0;
175  
                    return dest - dest0;
176  
                *dest++ = c;
176  
                *dest++ = c;
177  
                ++it;
177  
                ++it;
178  
                continue;
178  
                continue;
179  
            }
179  
            }
180  
            if(dest > end3)
180  
            if(dest > end3)
181  
                return dest - dest0;
181  
                return dest - dest0;
182  
            encode(dest, c);
182  
            encode(dest, c);
183  
            ++it;
183  
            ++it;
184  
        }
184  
        }
185  
    }
185  
    }
186  
    return dest - dest0;
186  
    return dest - dest0;
187  
}
187  
}
188  

188  

189  
//------------------------------------------------
189  
//------------------------------------------------
190  

190  

191  
// unsafe encode just
191  
// unsafe encode just
192  
// asserts on the output buffer
192  
// asserts on the output buffer
193  
//
193  
//
194  
template<BOOST_URL_CONSTRAINT(grammar::CharSet) CS>
194  
template<BOOST_URL_CONSTRAINT(grammar::CharSet) CS>
195  
std::size_t
195  
std::size_t
196  
encode_unsafe(
196  
encode_unsafe(
197  
    char* dest,
197  
    char* dest,
198  
    std::size_t size,
198  
    std::size_t size,
199  
    core::string_view s,
199  
    core::string_view s,
200  
    CS const& allowed,
200  
    CS const& allowed,
201  
    encoding_opts opt)
201  
    encoding_opts opt)
202  
{
202  
{
203  
    BOOST_CORE_STATIC_ASSERT(
203  
    BOOST_CORE_STATIC_ASSERT(
204  
        grammar::is_charset<CS>::value);
204  
        grammar::is_charset<CS>::value);
205  

205  

206  
    // '%' must be reserved
206  
    // '%' must be reserved
207  
    BOOST_ASSERT(!allowed('%'));
207  
    BOOST_ASSERT(!allowed('%'));
208  

208  

209  
    auto it = s.data();
209  
    auto it = s.data();
210  
    auto const last = it + s.size();
210  
    auto const last = it + s.size();
211  
    auto const end = dest + size;
211  
    auto const end = dest + size;
212  
    ignore_unused(end);
212  
    ignore_unused(end);
213  

213  

214  
    char const* const hex =
214  
    char const* const hex =
215  
        detail::hexdigs[opt.lower_case];
215  
        detail::hexdigs[opt.lower_case];
216  
    auto const encode = [end, hex](
216  
    auto const encode = [end, hex](
217  
        char*& dest,
217  
        char*& dest,
218  
        unsigned char c) noexcept
218  
        unsigned char c) noexcept
219  
    {
219  
    {
220  
        ignore_unused(end);
220  
        ignore_unused(end);
221  
        *dest++ = '%';
221  
        *dest++ = '%';
222  
        BOOST_ASSERT(dest != end);
222  
        BOOST_ASSERT(dest != end);
223  
        *dest++ = hex[c>>4];
223  
        *dest++ = hex[c>>4];
224  
        BOOST_ASSERT(dest != end);
224  
        BOOST_ASSERT(dest != end);
225  
        *dest++ = hex[c&0xf];
225  
        *dest++ = hex[c&0xf];
226  
    };
226  
    };
227  

227  

228  
    auto const dest0 = dest;
228  
    auto const dest0 = dest;
229  
    if (!opt.space_as_plus)
229  
    if (!opt.space_as_plus)
230  
    {
230  
    {
231  
        while(it != last)
231  
        while(it != last)
232  
        {
232  
        {
233  
            BOOST_ASSERT(dest != end);
233  
            BOOST_ASSERT(dest != end);
234  
            char const c = *it;
234  
            char const c = *it;
235  
            if(allowed(c))
235  
            if(allowed(c))
236  
            {
236  
            {
237  
                *dest++ = c;
237  
                *dest++ = c;
238  
            }
238  
            }
239  
            else
239  
            else
240  
            {
240  
            {
241  
                encode(dest, c);
241  
                encode(dest, c);
242  
            }
242  
            }
243  
            ++it;
243  
            ++it;
244  
        }
244  
        }
245  
    }
245  
    }
246  
    else
246  
    else
247  
    {
247  
    {
248  
        while(it != last)
248  
        while(it != last)
249  
        {
249  
        {
250  
            BOOST_ASSERT(dest != end);
250  
            BOOST_ASSERT(dest != end);
251  
            char const c = *it;
251  
            char const c = *it;
252  
            if (c == ' ')
252  
            if (c == ' ')
253  
            {
253  
            {
254  
                *dest++ = '+';
254  
                *dest++ = '+';
255  
            }
255  
            }
256  
            else if (
256  
            else if (
257  
                allowed(c) &&
257  
                allowed(c) &&
258  
                c != '+')
258  
                c != '+')
259  
            {
259  
            {
260  
                *dest++ = c;
260  
                *dest++ = c;
261  
            }
261  
            }
262  
            else
262  
            else
263  
            {
263  
            {
264  
                encode(dest, c);
264  
                encode(dest, c);
265  
            }
265  
            }
266  
            ++it;
266  
            ++it;
267  
        }
267  
        }
268  
    }
268  
    }
269  
    return dest - dest0;
269  
    return dest - dest0;
270  
}
270  
}
271  

271  

272  
//------------------------------------------------
272  
//------------------------------------------------
273  

273  

274  
template<
274  
template<
275  
    BOOST_URL_CONSTRAINT(string_token::StringToken) StringToken,
275  
    BOOST_URL_CONSTRAINT(string_token::StringToken) StringToken,
276  
    BOOST_URL_CONSTRAINT(grammar::CharSet) CS>
276  
    BOOST_URL_CONSTRAINT(grammar::CharSet) CS>
277  
BOOST_URL_STRTOK_RETURN
277  
BOOST_URL_STRTOK_RETURN
278  
encode(
278  
encode(
279  
    core::string_view s,
279  
    core::string_view s,
280  
    CS const& allowed,
280  
    CS const& allowed,
281  
    encoding_opts opt,
281  
    encoding_opts opt,
282  
    StringToken&& token) noexcept
282  
    StringToken&& token) noexcept
283  
{
283  
{
284  
    BOOST_CORE_STATIC_ASSERT(
284  
    BOOST_CORE_STATIC_ASSERT(
285  
        grammar::is_charset<CS>::value);
285  
        grammar::is_charset<CS>::value);
286  

286  

287  
    auto const n = encoded_size(
287  
    auto const n = encoded_size(
288  
        s, allowed, opt);
288  
        s, allowed, opt);
289  
    auto p = token.prepare(n);
289  
    auto p = token.prepare(n);
290  
    if(n > 0)
290  
    if(n > 0)
291  
        encode_unsafe(
291  
        encode_unsafe(
292  
            p, n, s, allowed, opt);
292  
            p, n, s, allowed, opt);
293  
    return token.result();
293  
    return token.result();
294  
}
294  
}
295  

295  

296  
} // urls
296  
} // urls
297  
} // boost
297  
} // boost
298  

298  

299  
#endif
299  
#endif