1  
//
1  
//
2  
// Copyright (c) 2021 Vinnie Falco (vinnie dot falco at gmail dot com)
2  
// Copyright (c) 2021 Vinnie Falco (vinnie dot falco at gmail dot com)
3  
//
3  
//
4  
// Distributed under the Boost Software License, Version 1.0. (See accompanying
4  
// Distributed under the Boost Software License, Version 1.0. (See accompanying
5  
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5  
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6  
//
6  
//
7  
// Official repository: https://github.com/boostorg/url
7  
// Official repository: https://github.com/boostorg/url
8  
//
8  
//
9  

9  

10  
#ifndef BOOST_URL_GRAMMAR_LUT_CHARS_HPP
10  
#ifndef BOOST_URL_GRAMMAR_LUT_CHARS_HPP
11  
#define BOOST_URL_GRAMMAR_LUT_CHARS_HPP
11  
#define BOOST_URL_GRAMMAR_LUT_CHARS_HPP
12  

12  

13  
#include <boost/url/detail/config.hpp>
13  
#include <boost/url/detail/config.hpp>
14  
#include <boost/url/grammar/detail/charset.hpp>
14  
#include <boost/url/grammar/detail/charset.hpp>
15  
#include <cstdint>
15  
#include <cstdint>
16  
#include <type_traits>
16  
#include <type_traits>
17  

17  

18  
// Credit to Peter Dimov for ideas regarding
18  
// Credit to Peter Dimov for ideas regarding
19  
// SIMD constexpr, and character set masks.
19  
// SIMD constexpr, and character set masks.
20  

20  

21  
namespace boost {
21  
namespace boost {
22  
namespace urls {
22  
namespace urls {
23  
namespace grammar {
23  
namespace grammar {
24  

24  

25  
#ifndef BOOST_URL_DOCS
25  
#ifndef BOOST_URL_DOCS
26  
namespace detail {
26  
namespace detail {
27  
template<class T, class = void>
27  
template<class T, class = void>
28  
struct is_pred : std::false_type {};
28  
struct is_pred : std::false_type {};
29  

29  

30  
template<class T>
30  
template<class T>
31  
struct is_pred<T, void_t<
31  
struct is_pred<T, void_t<
32  
    decltype(
32  
    decltype(
33  
    std::declval<bool&>() =
33  
    std::declval<bool&>() =
34  
        std::declval<T const&>().operator()(
34  
        std::declval<T const&>().operator()(
35  
            std::declval<char>())
35  
            std::declval<char>())
36  
            ) > > : std::true_type
36  
            ) > > : std::true_type
37  
{
37  
{
38  
};
38  
};
39  
} // detail
39  
} // detail
40  
#endif
40  
#endif
41  

41  

42  
/** A set of characters
42  
/** A set of characters
43  

43  

44  
    The characters defined by instances of
44  
    The characters defined by instances of
45  
    this set are provided upon construction.
45  
    this set are provided upon construction.
46  
    The `constexpr` implementation allows
46  
    The `constexpr` implementation allows
47  
    these to become compile-time constants.
47  
    these to become compile-time constants.
48  

48  

49  
    @par Example
49  
    @par Example
50  
    Character sets are used with rules and the
50  
    Character sets are used with rules and the
51  
    functions @ref find_if and @ref find_if_not.
51  
    functions @ref find_if and @ref find_if_not.
52  
    @code
52  
    @code
53  
    constexpr lut_chars vowel_chars = "AEIOU" "aeiou";
53  
    constexpr lut_chars vowel_chars = "AEIOU" "aeiou";
54  

54  

55  
    system::result< core::string_view > rv = parse( "Aiea", token_rule( vowel_chars ) );
55  
    system::result< core::string_view > rv = parse( "Aiea", token_rule( vowel_chars ) );
56  
    @endcode
56  
    @endcode
57  

57  

58  
    @see
58  
    @see
59  
        @ref find_if,
59  
        @ref find_if,
60  
        @ref find_if_not,
60  
        @ref find_if_not,
61  
        @ref parse,
61  
        @ref parse,
62  
        @ref token_rule.
62  
        @ref token_rule.
63  
*/
63  
*/
64  
class lut_chars
64  
class lut_chars
65  
{
65  
{
66  
    std::uint64_t mask_[4] = {};
66  
    std::uint64_t mask_[4] = {};
67  

67  

68  
    constexpr
68  
    constexpr
69  
    static
69  
    static
70  
    std::uint64_t
70  
    std::uint64_t
71  
    lo(char c) noexcept
71  
    lo(char c) noexcept
72  
    {
72  
    {
73  
        return static_cast<
73  
        return static_cast<
74  
            unsigned char>(c) & 3;
74  
            unsigned char>(c) & 3;
75  
    }
75  
    }
76  

76  

77  
    constexpr
77  
    constexpr
78  
    static
78  
    static
79  
    std::uint64_t
79  
    std::uint64_t
80  
    hi(char c) noexcept
80  
    hi(char c) noexcept
81  
    {
81  
    {
82  
        return 1ULL << (static_cast<
82  
        return 1ULL << (static_cast<
83  
            unsigned char>(c) >> 2);
83  
            unsigned char>(c) >> 2);
84  
    }
84  
    }
85  

85  

86  
    constexpr
86  
    constexpr
87  
    static
87  
    static
88  
    lut_chars
88  
    lut_chars
89  
    construct(
89  
    construct(
90  
        char const* s) noexcept
90  
        char const* s) noexcept
91  
    {
91  
    {
92  
        return *s
92  
        return *s
93  
            ? lut_chars(*s) +
93  
            ? lut_chars(*s) +
94  
                construct(s+1)
94  
                construct(s+1)
95  
            : lut_chars();
95  
            : lut_chars();
96  
    }
96  
    }
97  

97  

98  
    constexpr
98  
    constexpr
99  
    static
99  
    static
100  
    lut_chars
100  
    lut_chars
101  
    construct(
101  
    construct(
102  
        unsigned char ch,
102  
        unsigned char ch,
103  
        bool b) noexcept
103  
        bool b) noexcept
104  
    {
104  
    {
105  
        return b
105  
        return b
106  
            ? lut_chars(ch)
106  
            ? lut_chars(ch)
107  
            : lut_chars();
107  
            : lut_chars();
108  
    }
108  
    }
109  

109  

110  
    template<class Pred>
110  
    template<class Pred>
111  
    constexpr
111  
    constexpr
112  
    static
112  
    static
113  
    lut_chars
113  
    lut_chars
114  
    construct(
114  
    construct(
115  
        Pred pred,
115  
        Pred pred,
116  
        unsigned char ch) noexcept
116  
        unsigned char ch) noexcept
117  
    {
117  
    {
118  
        return ch == 255
118  
        return ch == 255
119  
            ? construct(ch, pred(static_cast<char>(ch)))
119  
            ? construct(ch, pred(static_cast<char>(ch)))
120  
            : construct(ch, pred(static_cast<char>(ch))) +
120  
            : construct(ch, pred(static_cast<char>(ch))) +
121  
                construct(pred, ch + 1);
121  
                construct(pred, ch + 1);
122  
    }
122  
    }
123  

123  

124  
    constexpr
124  
    constexpr
125  
    lut_chars() = default;
125  
    lut_chars() = default;
126  

126  

127  
    constexpr
127  
    constexpr
128  
    lut_chars(
128  
    lut_chars(
129  
        std::uint64_t m0,
129  
        std::uint64_t m0,
130  
        std::uint64_t m1,
130  
        std::uint64_t m1,
131  
        std::uint64_t m2,
131  
        std::uint64_t m2,
132  
        std::uint64_t m3) noexcept
132  
        std::uint64_t m3) noexcept
133  
        : mask_{ m0, m1, m2, m3 }
133  
        : mask_{ m0, m1, m2, m3 }
134  
    {
134  
    {
135  
    }
135  
    }
136  

136  

137  
public:
137  
public:
138  
    /** Constructor
138  
    /** Constructor
139  

139  

140  
        This function constructs a character
140  
        This function constructs a character
141  
        set which has as a single member,
141  
        set which has as a single member,
142  
        the character `ch`.
142  
        the character `ch`.
143  

143  

144  
        @par Example
144  
        @par Example
145  
        @code
145  
        @code
146  
        constexpr lut_chars asterisk( '*' );
146  
        constexpr lut_chars asterisk( '*' );
147  
        @endcode
147  
        @endcode
148  

148  

149  
        @par Complexity
149  
        @par Complexity
150  
        Constant.
150  
        Constant.
151  

151  

152  
        @par Exception Safety
152  
        @par Exception Safety
153  
        Throws nothing.
153  
        Throws nothing.
154  

154  

155  
        @param ch A character.
155  
        @param ch A character.
156  
    */
156  
    */
157  
    constexpr
157  
    constexpr
158  
    lut_chars(char ch) noexcept
158  
    lut_chars(char ch) noexcept
159  
        : mask_ {
159  
        : mask_ {
160  
            lo(ch) == 0 ? hi(ch) : 0,
160  
            lo(ch) == 0 ? hi(ch) : 0,
161  
            lo(ch) == 1 ? hi(ch) : 0,
161  
            lo(ch) == 1 ? hi(ch) : 0,
162  
            lo(ch) == 2 ? hi(ch) : 0,
162  
            lo(ch) == 2 ? hi(ch) : 0,
163  
            lo(ch) == 3 ? hi(ch) : 0 }
163  
            lo(ch) == 3 ? hi(ch) : 0 }
164  
    {
164  
    {
165  
    }
165  
    }
166  

166  

167  
    /** Constructor
167  
    /** Constructor
168  

168  

169  
        This function constructs a character
169  
        This function constructs a character
170  
        set which has as members, all of the
170  
        set which has as members, all of the
171  
        characters present in the null-terminated
171  
        characters present in the null-terminated
172  
        string `s`.
172  
        string `s`.
173  

173  

174  
        @par Example
174  
        @par Example
175  
        @code
175  
        @code
176  
        constexpr lut_chars digits = "0123456789";
176  
        constexpr lut_chars digits = "0123456789";
177  
        @endcode
177  
        @endcode
178  

178  

179  
        @par Complexity
179  
        @par Complexity
180  
        Linear in `::strlen(s)`, or constant
180  
        Linear in `::strlen(s)`, or constant
181  
        if `s` is a constant expression.
181  
        if `s` is a constant expression.
182  

182  

183  
        @par Exception Safety
183  
        @par Exception Safety
184  
        Throws nothing.
184  
        Throws nothing.
185  

185  

186  
        @param s A null-terminated string.
186  
        @param s A null-terminated string.
187  
    */
187  
    */
188  
    constexpr
188  
    constexpr
189  
    lut_chars(
189  
    lut_chars(
190  
        char const* s) noexcept
190  
        char const* s) noexcept
191  
        : lut_chars(construct(s))
191  
        : lut_chars(construct(s))
192  
    {
192  
    {
193  
    }
193  
    }
194  

194  

195  
    /** Constructor.
195  
    /** Constructor.
196  

196  

197  
        This function constructs a character
197  
        This function constructs a character
198  
        set which has as members, every value
198  
        set which has as members, every value
199  
        of `char ch` for which the expression
199  
        of `char ch` for which the expression
200  
        `pred(ch)` returns `true`.
200  
        `pred(ch)` returns `true`.
201  

201  

202  
        @par Example
202  
        @par Example
203  
        @code
203  
        @code
204  
        struct is_digit
204  
        struct is_digit
205  
        {
205  
        {
206  
            constexpr bool
206  
            constexpr bool
207  
            operator()(char c ) const noexcept
207  
            operator()(char c ) const noexcept
208  
            {
208  
            {
209  
                return c >= '0' && c <= '9';
209  
                return c >= '0' && c <= '9';
210  
            }
210  
            }
211  
        };
211  
        };
212  

212  

213  
        constexpr lut_chars digits( is_digit{} );
213  
        constexpr lut_chars digits( is_digit{} );
214  
        @endcode
214  
        @endcode
215  

215  

216  
        @par Complexity
216  
        @par Complexity
217  
        Linear in `pred`, or constant if
217  
        Linear in `pred`, or constant if
218  
        `pred(ch)` is a constant expression.
218  
        `pred(ch)` is a constant expression.
219  

219  

220  
        @par Exception Safety
220  
        @par Exception Safety
221  
        Throws nothing.
221  
        Throws nothing.
222  

222  

223  
        @param pred The function object to
223  
        @param pred The function object to
224  
        use for determining membership in
224  
        use for determining membership in
225  
        the character set.
225  
        the character set.
226  
    */
226  
    */
227  
    template<class Pred
227  
    template<class Pred
228  
#ifndef BOOST_URL_DOCS
228  
#ifndef BOOST_URL_DOCS
229  
        ,class = typename std::enable_if<
229  
        ,class = typename std::enable_if<
230  
            detail::is_pred<Pred>::value &&
230  
            detail::is_pred<Pred>::value &&
231  
        ! std::is_base_of<
231  
        ! std::is_base_of<
232  
            lut_chars, Pred>::value>::type
232  
            lut_chars, Pred>::value>::type
233  
#endif
233  
#endif
234  
    >
234  
    >
235  
    constexpr
235  
    constexpr
236  
    lut_chars(Pred const& pred) noexcept
236  
    lut_chars(Pred const& pred) noexcept
237  
        : lut_chars(
237  
        : lut_chars(
238  
            construct(pred, 0))
238  
            construct(pred, 0))
239  
    {
239  
    {
240  
    }
240  
    }
241  

241  

242  
    /** Return true if ch is in the character set.
242  
    /** Return true if ch is in the character set.
243  

243  

244  
        This function returns true if the
244  
        This function returns true if the
245  
        character `ch` is in the set, otherwise
245  
        character `ch` is in the set, otherwise
246  
        it returns false.
246  
        it returns false.
247  

247  

248  
        @par Complexity
248  
        @par Complexity
249  
        Constant.
249  
        Constant.
250  

250  

251  
        @par Exception Safety
251  
        @par Exception Safety
252  
        Throws nothing.
252  
        Throws nothing.
253  

253  

254  
        @param ch The character to test.
254  
        @param ch The character to test.
255  
        @return `true` if `ch` is in the set.
255  
        @return `true` if `ch` is in the set.
256  
    */
256  
    */
257  
    constexpr
257  
    constexpr
258  
    bool
258  
    bool
259  
    operator()(
259  
    operator()(
260  
        unsigned char ch) const noexcept
260  
        unsigned char ch) const noexcept
261  
    {
261  
    {
262  
        return operator()(static_cast<char>(ch));
262  
        return operator()(static_cast<char>(ch));
263  
    }
263  
    }
264  

264  

265  
    /// @copydoc operator()(unsigned char) const
265  
    /// @copydoc operator()(unsigned char) const
266  
    constexpr
266  
    constexpr
267  
    bool
267  
    bool
268  
    operator()(char ch) const noexcept
268  
    operator()(char ch) const noexcept
269  
    {
269  
    {
270  
        return mask_[lo(ch)] & hi(ch);
270  
        return mask_[lo(ch)] & hi(ch);
271  
    }
271  
    }
272  

272  

273  
    /** Return the union of two character sets.
273  
    /** Return the union of two character sets.
274  

274  

275  
        This function returns a new character
275  
        This function returns a new character
276  
        set which contains all of the characters
276  
        set which contains all of the characters
277  
        in `cs0` as well as all of the characters
277  
        in `cs0` as well as all of the characters
278  
        in `cs`.
278  
        in `cs`.
279  

279  

280  
        @par Example
280  
        @par Example
281  
        This creates a character set which
281  
        This creates a character set which
282  
        includes all letters and numbers
282  
        includes all letters and numbers
283  
        @code
283  
        @code
284  
        constexpr lut_chars alpha_chars(
284  
        constexpr lut_chars alpha_chars(
285  
            "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
285  
            "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
286  
            "abcdefghijklmnopqrstuvwxyz");
286  
            "abcdefghijklmnopqrstuvwxyz");
287  

287  

288  
        constexpr lut_chars alnum_chars = alpha_chars + "0123456789";
288  
        constexpr lut_chars alnum_chars = alpha_chars + "0123456789";
289  
        @endcode
289  
        @endcode
290  

290  

291  
        @par Complexity
291  
        @par Complexity
292  
        Constant.
292  
        Constant.
293  

293  

294  
        @return The new character set.
294  
        @return The new character set.
295  

295  

296  
        @param cs0 A character to join
296  
        @param cs0 A character to join
297  
        
297  
        
298  
        @param cs1 A character to join
298  
        @param cs1 A character to join
299  
    */
299  
    */
300  
    friend
300  
    friend
301  
    constexpr
301  
    constexpr
302  
    lut_chars
302  
    lut_chars
303  
    operator+(
303  
    operator+(
304  
        lut_chars const& cs0,
304  
        lut_chars const& cs0,
305  
        lut_chars const& cs1) noexcept
305  
        lut_chars const& cs1) noexcept
306  
    {
306  
    {
307  
        return lut_chars(
307  
        return lut_chars(
308  
            cs0.mask_[0] | cs1.mask_[0],
308  
            cs0.mask_[0] | cs1.mask_[0],
309  
            cs0.mask_[1] | cs1.mask_[1],
309  
            cs0.mask_[1] | cs1.mask_[1],
310  
            cs0.mask_[2] | cs1.mask_[2],
310  
            cs0.mask_[2] | cs1.mask_[2],
311  
            cs0.mask_[3] | cs1.mask_[3]);
311  
            cs0.mask_[3] | cs1.mask_[3]);
312  
    }
312  
    }
313  

313  

314  
    /** Return a new character set by subtracting
314  
    /** Return a new character set by subtracting
315  

315  

316  
        This function returns a new character
316  
        This function returns a new character
317  
        set which is formed from all of the
317  
        set which is formed from all of the
318  
        characters in `cs0` which are not in `cs`.
318  
        characters in `cs0` which are not in `cs`.
319  

319  

320  
        @par Example
320  
        @par Example
321  
        This statement declares a character set
321  
        This statement declares a character set
322  
        containing all the lowercase letters
322  
        containing all the lowercase letters
323  
        which are not vowels:
323  
        which are not vowels:
324  
        @code
324  
        @code
325  
        constexpr lut_chars consonants = lut_chars("abcdefghijklmnopqrstuvwxyz") - "aeiou";
325  
        constexpr lut_chars consonants = lut_chars("abcdefghijklmnopqrstuvwxyz") - "aeiou";
326  
        @endcode
326  
        @endcode
327  

327  

328  
        @par Complexity
328  
        @par Complexity
329  
        Constant.
329  
        Constant.
330  

330  

331  
        @return The new character set.
331  
        @return The new character set.
332  

332  

333  
        @param cs0 A character set to join.
333  
        @param cs0 A character set to join.
334  
        
334  
        
335  
        @param cs1 A character set to join.
335  
        @param cs1 A character set to join.
336  
    */
336  
    */
337  
    friend
337  
    friend
338  
    constexpr
338  
    constexpr
339  
    lut_chars
339  
    lut_chars
340  
    operator-(
340  
    operator-(
341  
        lut_chars const& cs0,
341  
        lut_chars const& cs0,
342  
        lut_chars const& cs1) noexcept
342  
        lut_chars const& cs1) noexcept
343  
    {
343  
    {
344  
        return lut_chars(
344  
        return lut_chars(
345  
            cs0.mask_[0] & ~cs1.mask_[0],
345  
            cs0.mask_[0] & ~cs1.mask_[0],
346  
            cs0.mask_[1] & ~cs1.mask_[1],
346  
            cs0.mask_[1] & ~cs1.mask_[1],
347  
            cs0.mask_[2] & ~cs1.mask_[2],
347  
            cs0.mask_[2] & ~cs1.mask_[2],
348  
            cs0.mask_[3] & ~cs1.mask_[3]);
348  
            cs0.mask_[3] & ~cs1.mask_[3]);
349  
    }
349  
    }
350  

350  

351  
    /** Return a new character set which is the complement of another character set.
351  
    /** Return a new character set which is the complement of another character set.
352  

352  

353  
        This function returns a new character
353  
        This function returns a new character
354  
        set which contains all of the characters
354  
        set which contains all of the characters
355  
        that are not in `*this`.
355  
        that are not in `*this`.
356  

356  

357  
        @par Example
357  
        @par Example
358  
        This statement declares a character set
358  
        This statement declares a character set
359  
        containing everything but vowels:
359  
        containing everything but vowels:
360  
        @code
360  
        @code
361  
        constexpr lut_chars not_vowels = ~lut_chars( "AEIOU" "aeiou" );
361  
        constexpr lut_chars not_vowels = ~lut_chars( "AEIOU" "aeiou" );
362  
        @endcode
362  
        @endcode
363  

363  

364  
        @par Complexity
364  
        @par Complexity
365  
        Constant.
365  
        Constant.
366  

366  

367  
        @par Exception Safety
367  
        @par Exception Safety
368  
        Throws nothing.
368  
        Throws nothing.
369  

369  

370  
        @return The new character set.
370  
        @return The new character set.
371  
    */
371  
    */
372  
    constexpr
372  
    constexpr
373  
    lut_chars
373  
    lut_chars
374  
    operator~() const noexcept
374  
    operator~() const noexcept
375  
    {
375  
    {
376  
        return lut_chars(
376  
        return lut_chars(
377  
            ~mask_[0],
377  
            ~mask_[0],
378  
            ~mask_[1],
378  
            ~mask_[1],
379  
            ~mask_[2],
379  
            ~mask_[2],
380  
            ~mask_[3]
380  
            ~mask_[3]
381  
        );
381  
        );
382  
    }
382  
    }
383  

383  

384  
#ifndef BOOST_URL_DOCS
384  
#ifndef BOOST_URL_DOCS
385  
#ifdef BOOST_URL_USE_SSE2
385  
#ifdef BOOST_URL_USE_SSE2
386  
    char const*
386  
    char const*
387  
    find_if(
387  
    find_if(
388  
        char const* first,
388  
        char const* first,
389  
        char const* last) const noexcept
389  
        char const* last) const noexcept
390  
    {
390  
    {
391  
        return detail::find_if_pred(
391  
        return detail::find_if_pred(
392  
            *this, first, last);
392  
            *this, first, last);
393  
    }
393  
    }
394  

394  

395  
    char const*
395  
    char const*
396  
    find_if_not(
396  
    find_if_not(
397  
        char const* first,
397  
        char const* first,
398  
        char const* last) const noexcept
398  
        char const* last) const noexcept
399  
    {
399  
    {
400  
        return detail::find_if_not_pred(
400  
        return detail::find_if_not_pred(
401  
            *this, first, last);
401  
            *this, first, last);
402  
    }
402  
    }
403  
#endif
403  
#endif
404  
#endif
404  
#endif
405  
};
405  
};
406  

406  

407  
} // grammar
407  
} // grammar
408  
} // urls
408  
} // urls
409  
} // boost
409  
} // boost
410  

410  

411  
#endif
411  
#endif