1 // Boost string_algo library regex.hpp header file ---------------------------//
3 // Copyright Pavol Droba 2002-2003.
5 // Distributed under the Boost Software License, Version 1.0.
6 // (See accompanying file LICENSE_1_0.txt or copy at
7 // http://www.boost.org/LICENSE_1_0.txt)
9 // See http://www.boost.org/ for updates, documentation, and revision history.
11 #ifndef BOOST_STRING_REGEX_HPP
12 #define BOOST_STRING_REGEX_HPP
14 #include <boost/algorithm/string/config.hpp>
15 #include <boost/regex.hpp>
17 #include <boost/range/iterator_range.hpp>
18 #include <boost/range/begin.hpp>
19 #include <boost/range/end.hpp>
20 #include <boost/range/iterator.hpp>
21 #include <boost/range/as_literal.hpp>
23 #include <boost/algorithm/string/find_format.hpp>
24 #include <boost/algorithm/string/regex_find_format.hpp>
25 #include <boost/algorithm/string/formatter.hpp>
26 #include <boost/algorithm/string/iter_find.hpp>
29 Defines regex variants of the algorithms.
35 // find_regex -----------------------------------------------//
37 //! Find regex algorithm
39 Search for a substring matching the given regex in the input.
41 \param Input A container which will be searched.
42 \param Rx A regular expression
43 \param Flags Regex options
45 An \c iterator_range delimiting the match.
46 Returned iterator is either \c RangeT::iterator or
47 \c RangeT::const_iterator, depending on the constness of
50 \note This function provides the strong exception-safety guarantee
55 typename RegexTraitsT>
56 inline iterator_range<
57 BOOST_STRING_TYPENAME range_iterator<RangeT>::type >
60 const basic_regex<CharT, RegexTraitsT>& Rx,
61 match_flag_type Flags=match_default )
63 iterator_range<BOOST_STRING_TYPENAME range_iterator<RangeT>::type> lit_input(::boost::as_literal(Input));
65 return ::boost::algorithm::regex_finder(Rx,Flags)(
66 ::boost::begin(lit_input), ::boost::end(lit_input) );
69 // replace_regex --------------------------------------------------------------------//
71 //! Replace regex algorithm
73 Search for a substring matching given regex and format it with
75 The result is a modified copy of the input. It is returned as a sequence
76 or copied to the output iterator.
78 \param Output An output iterator to which the result will be copied
79 \param Input An input string
80 \param Rx A regular expression
81 \param Format Regex format definition
82 \param Flags Regex options
83 \return An output iterator pointing just after the last inserted character or
84 a modified copy of the input
86 \note The second variant of this function provides the strong exception-safety guarantee
89 typename OutputIteratorT,
92 typename RegexTraitsT,
93 typename FormatStringTraitsT, typename FormatStringAllocatorT >
94 inline OutputIteratorT replace_regex_copy(
95 OutputIteratorT Output,
97 const basic_regex<CharT, RegexTraitsT>& Rx,
98 const std::basic_string<CharT, FormatStringTraitsT, FormatStringAllocatorT>& Format,
99 match_flag_type Flags=match_default | format_default )
101 return ::boost::algorithm::find_format_copy(
104 ::boost::algorithm::regex_finder( Rx, Flags ),
105 ::boost::algorithm::regex_formatter( Format, Flags ) );
108 //! Replace regex algorithm
115 typename RegexTraitsT,
116 typename FormatStringTraitsT, typename FormatStringAllocatorT >
117 inline SequenceT replace_regex_copy(
118 const SequenceT& Input,
119 const basic_regex<CharT, RegexTraitsT>& Rx,
120 const std::basic_string<CharT, FormatStringTraitsT, FormatStringAllocatorT>& Format,
121 match_flag_type Flags=match_default | format_default )
123 return ::boost::algorithm::find_format_copy(
125 ::boost::algorithm::regex_finder( Rx, Flags ),
126 ::boost::algorithm::regex_formatter( Format, Flags ) );
129 //! Replace regex algorithm
131 Search for a substring matching given regex and format it with
132 the specified format. The input string is modified in-place.
134 \param Input An input string
135 \param Rx A regular expression
136 \param Format Regex format definition
137 \param Flags Regex options
142 typename RegexTraitsT,
143 typename FormatStringTraitsT, typename FormatStringAllocatorT >
144 inline void replace_regex(
146 const basic_regex<CharT, RegexTraitsT>& Rx,
147 const std::basic_string<CharT, FormatStringTraitsT, FormatStringAllocatorT>& Format,
148 match_flag_type Flags=match_default | format_default )
150 ::boost::algorithm::find_format(
152 ::boost::algorithm::regex_finder( Rx, Flags ),
153 ::boost::algorithm::regex_formatter( Format, Flags ) );
156 // replace_all_regex --------------------------------------------------------------------//
158 //! Replace all regex algorithm
160 Format all substrings, matching given regex, with the specified format.
161 The result is a modified copy of the input. It is returned as a sequence
162 or copied to the output iterator.
164 \param Output An output iterator to which the result will be copied
165 \param Input An input string
166 \param Rx A regular expression
167 \param Format Regex format definition
168 \param Flags Regex options
169 \return An output iterator pointing just after the last inserted character or
170 a modified copy of the input
172 \note The second variant of this function provides the strong exception-safety guarantee
175 typename OutputIteratorT,
178 typename RegexTraitsT,
179 typename FormatStringTraitsT, typename FormatStringAllocatorT >
180 inline OutputIteratorT replace_all_regex_copy(
181 OutputIteratorT Output,
183 const basic_regex<CharT, RegexTraitsT>& Rx,
184 const std::basic_string<CharT, FormatStringTraitsT, FormatStringAllocatorT>& Format,
185 match_flag_type Flags=match_default | format_default )
187 return ::boost::algorithm::find_format_all_copy(
190 ::boost::algorithm::regex_finder( Rx, Flags ),
191 ::boost::algorithm::regex_formatter( Format, Flags ) );
194 //! Replace all regex algorithm
201 typename RegexTraitsT,
202 typename FormatStringTraitsT, typename FormatStringAllocatorT >
203 inline SequenceT replace_all_regex_copy(
204 const SequenceT& Input,
205 const basic_regex<CharT, RegexTraitsT>& Rx,
206 const std::basic_string<CharT, FormatStringTraitsT, FormatStringAllocatorT>& Format,
207 match_flag_type Flags=match_default | format_default )
209 return ::boost::algorithm::find_format_all_copy(
211 ::boost::algorithm::regex_finder( Rx, Flags ),
212 ::boost::algorithm::regex_formatter( Format, Flags ) );
215 //! Replace all regex algorithm
217 Format all substrings, matching given regex, with the specified format.
218 The input string is modified in-place.
220 \param Input An input string
221 \param Rx A regular expression
222 \param Format Regex format definition
223 \param Flags Regex options
228 typename RegexTraitsT,
229 typename FormatStringTraitsT, typename FormatStringAllocatorT >
230 inline void replace_all_regex(
232 const basic_regex<CharT, RegexTraitsT>& Rx,
233 const std::basic_string<CharT, FormatStringTraitsT, FormatStringAllocatorT>& Format,
234 match_flag_type Flags=match_default | format_default )
236 ::boost::algorithm::find_format_all(
238 ::boost::algorithm::regex_finder( Rx, Flags ),
239 ::boost::algorithm::regex_formatter( Format, Flags ) );
242 // erase_regex --------------------------------------------------------------------//
244 //! Erase regex algorithm
246 Remove a substring matching given regex from the input.
247 The result is a modified copy of the input. It is returned as a sequence
248 or copied to the output iterator.
250 \param Output An output iterator to which the result will be copied
251 \param Input An input string
252 \param Rx A regular expression
253 \param Flags Regex options
254 \return An output iterator pointing just after the last inserted character or
255 a modified copy of the input
257 \note The second variant of this function provides the strong exception-safety guarantee
260 typename OutputIteratorT,
263 typename RegexTraitsT >
264 inline OutputIteratorT erase_regex_copy(
265 OutputIteratorT Output,
267 const basic_regex<CharT, RegexTraitsT>& Rx,
268 match_flag_type Flags=match_default )
270 return ::boost::algorithm::find_format_copy(
273 ::boost::algorithm::regex_finder( Rx, Flags ),
274 ::boost::algorithm::empty_formatter( Input ) );
277 //! Erase regex algorithm
284 typename RegexTraitsT >
285 inline SequenceT erase_regex_copy(
286 const SequenceT& Input,
287 const basic_regex<CharT, RegexTraitsT>& Rx,
288 match_flag_type Flags=match_default )
290 return ::boost::algorithm::find_format_copy(
292 ::boost::algorithm::regex_finder( Rx, Flags ),
293 ::boost::algorithm::empty_formatter( Input ) );
296 //! Erase regex algorithm
298 Remove a substring matching given regex from the input.
299 The input string is modified in-place.
301 \param Input An input string
302 \param Rx A regular expression
303 \param Flags Regex options
308 typename RegexTraitsT >
309 inline void erase_regex(
311 const basic_regex<CharT, RegexTraitsT>& Rx,
312 match_flag_type Flags=match_default )
314 ::boost::algorithm::find_format(
316 ::boost::algorithm::regex_finder( Rx, Flags ),
317 ::boost::algorithm::empty_formatter( Input ) );
320 // erase_all_regex --------------------------------------------------------------------//
322 //! Erase all regex algorithm
324 Erase all substrings, matching given regex, from the input.
325 The result is a modified copy of the input. It is returned as a sequence
326 or copied to the output iterator.
329 \param Output An output iterator to which the result will be copied
330 \param Input An input string
331 \param Rx A regular expression
332 \param Flags Regex options
333 \return An output iterator pointing just after the last inserted character or
334 a modified copy of the input
336 \note The second variant of this function provides the strong exception-safety guarantee
339 typename OutputIteratorT,
342 typename RegexTraitsT >
343 inline OutputIteratorT erase_all_regex_copy(
344 OutputIteratorT Output,
346 const basic_regex<CharT, RegexTraitsT>& Rx,
347 match_flag_type Flags=match_default )
349 return ::boost::algorithm::find_format_all_copy(
352 ::boost::algorithm::regex_finder( Rx, Flags ),
353 ::boost::algorithm::empty_formatter( Input ) );
356 //! Erase all regex algorithm
363 typename RegexTraitsT >
364 inline SequenceT erase_all_regex_copy(
365 const SequenceT& Input,
366 const basic_regex<CharT, RegexTraitsT>& Rx,
367 match_flag_type Flags=match_default )
369 return ::boost::algorithm::find_format_all_copy(
371 ::boost::algorithm::regex_finder( Rx, Flags ),
372 ::boost::algorithm::empty_formatter( Input ) );
375 //! Erase all regex algorithm
377 Erase all substrings, matching given regex, from the input.
378 The input string is modified in-place.
380 \param Input An input string
381 \param Rx A regular expression
382 \param Flags Regex options
387 typename RegexTraitsT>
388 inline void erase_all_regex(
390 const basic_regex<CharT, RegexTraitsT>& Rx,
391 match_flag_type Flags=match_default )
393 ::boost::algorithm::find_format_all(
395 ::boost::algorithm::regex_finder( Rx, Flags ),
396 ::boost::algorithm::empty_formatter( Input ) );
399 // find_all_regex ------------------------------------------------------------------//
401 //! Find all regex algorithm
403 This algorithm finds all substrings matching the give regex
406 Each part is copied and added as a new element to the output container.
407 Thus the result container must be able to hold copies
408 of the matches (in a compatible structure like std::string) or
409 a reference to it (e.g. using the iterator range class).
410 Examples of such a container are \c std::vector<std::string>
411 or \c std::list<boost::iterator_range<std::string::iterator>>
413 \param Result A container that can hold copies of references to the substrings.
414 \param Input A container which will be searched.
415 \param Rx A regular expression
416 \param Flags Regex options
417 \return A reference to the result
419 \note Prior content of the result will be overwritten.
421 \note This function provides the strong exception-safety guarantee
424 typename SequenceSequenceT,
427 typename RegexTraitsT >
428 inline SequenceSequenceT& find_all_regex(
429 SequenceSequenceT& Result,
431 const basic_regex<CharT, RegexTraitsT>& Rx,
432 match_flag_type Flags=match_default )
434 return ::boost::algorithm::iter_find(
437 ::boost::algorithm::regex_finder(Rx,Flags) );
440 // split_regex ------------------------------------------------------------------//
442 //! Split regex algorithm
444 Tokenize expression. This function is equivalent to C strtok. Input
445 sequence is split into tokens, separated by separators. Separator
446 is an every match of the given regex.
447 Each part is copied and added as a new element to the output container.
448 Thus the result container must be able to hold copies
449 of the matches (in a compatible structure like std::string) or
450 a reference to it (e.g. using the iterator range class).
451 Examples of such a container are \c std::vector<std::string>
452 or \c std::list<boost::iterator_range<std::string::iterator>>
454 \param Result A container that can hold copies of references to the substrings.
455 \param Input A container which will be searched.
456 \param Rx A regular expression
457 \param Flags Regex options
458 \return A reference to the result
460 \note Prior content of the result will be overwritten.
462 \note This function provides the strong exception-safety guarantee
465 typename SequenceSequenceT,
468 typename RegexTraitsT >
469 inline SequenceSequenceT& split_regex(
470 SequenceSequenceT& Result,
472 const basic_regex<CharT, RegexTraitsT>& Rx,
473 match_flag_type Flags=match_default )
475 return ::boost::algorithm::iter_split(
478 ::boost::algorithm::regex_finder(Rx,Flags) );
481 // join_if ------------------------------------------------------------------//
483 #ifndef BOOST_NO_FUNCTION_TEMPLATE_ORDERING
485 //! Conditional join algorithm
487 This algorithm joins all strings in a 'list' into one long string.
488 Segments are concatenated by given separator. Only segments that
489 match the given regular expression will be added to the result
491 This is a specialization of join_if algorithm.
493 \param Input A container that holds the input strings. It must be a container-of-containers.
494 \param Separator A string that will separate the joined segments.
495 \param Rx A regular expression
496 \param Flags Regex options
497 \return Concatenated string.
499 \note This function provides the strong exception-safety guarantee
502 typename SequenceSequenceT,
505 typename RegexTraitsT >
506 inline typename range_value<SequenceSequenceT>::type
508 const SequenceSequenceT& Input,
509 const Range1T& Separator,
510 const basic_regex<CharT, RegexTraitsT>& Rx,
511 match_flag_type Flags=match_default )
513 // Define working types
514 typedef typename range_value<SequenceSequenceT>::type ResultT;
515 typedef typename range_const_iterator<SequenceSequenceT>::type InputIteratorT;
518 InputIteratorT itBegin=::boost::begin(Input);
519 InputIteratorT itEnd=::boost::end(Input);
521 // Construct container to hold the result
525 // Roll to the first element that will be added
528 !::boost::regex_match(::boost::begin(*itBegin), ::boost::end(*itBegin), Rx, Flags)) ++itBegin;
533 detail::insert(Result, ::boost::end(Result), *itBegin);
537 for(;itBegin!=itEnd; ++itBegin)
539 if(::boost::regex_match(::boost::begin(*itBegin), ::boost::end(*itBegin), Rx, Flags))
542 detail::insert(Result, ::boost::end(Result), ::boost::as_literal(Separator));
544 detail::insert(Result, ::boost::end(Result), *itBegin);
551 #else // BOOST_NO_FUNCTION_TEMPLATE_ORDERING
553 //! Conditional join algorithm
555 This algorithm joins all strings in a 'list' into one long string.
556 Segments are concatenated by given separator. Only segments that
557 match the given regular expression will be added to the result
559 This is a specialization of join_if algorithm.
561 \param Input A container that holds the input strings. It must be a container-of-containers.
562 \param Separator A string that will separate the joined segments.
563 \param Rx A regular expression
564 \param Flags Regex options
565 \return Concatenated string.
567 \note This function provides the strong exception-safety guarantee
570 typename SequenceSequenceT,
573 typename RegexTraitsT >
574 inline typename range_value<SequenceSequenceT>::type
576 const SequenceSequenceT& Input,
577 const Range1T& Separator,
578 const basic_regex<CharT, RegexTraitsT>& Rx,
579 match_flag_type Flags=match_default )
581 // Define working types
582 typedef typename range_value<SequenceSequenceT>::type ResultT;
583 typedef typename range_const_iterator<SequenceSequenceT>::type InputIteratorT;
586 InputIteratorT itBegin=::boost::begin(Input);
587 InputIteratorT itEnd=::boost::end(Input);
589 // Construct container to hold the result
593 // Roll to the first element that will be added
596 !::boost::regex_match(::boost::begin(*itBegin), ::boost::end(*itBegin), Rx, Flags)) ++itBegin;
601 detail::insert(Result, ::boost::end(Result), *itBegin);
605 for(;itBegin!=itEnd; ++itBegin)
607 if(::boost::regex_match(::boost::begin(*itBegin), ::boost::end(*itBegin), Rx, Flags))
610 detail::insert(Result, ::boost::end(Result), ::boost::as_literal(Separator));
612 detail::insert(Result, ::boost::end(Result), *itBegin);
620 #endif // BOOST_NO_FUNCTION_TEMPLATE_ORDERING
622 } // namespace algorithm
624 // pull names into the boost namespace
625 using algorithm::find_regex;
626 using algorithm::replace_regex;
627 using algorithm::replace_regex_copy;
628 using algorithm::replace_all_regex;
629 using algorithm::replace_all_regex_copy;
630 using algorithm::erase_regex;
631 using algorithm::erase_regex_copy;
632 using algorithm::erase_all_regex;
633 using algorithm::erase_all_regex_copy;
634 using algorithm::find_all_regex;
635 using algorithm::split_regex;
637 #ifndef BOOST_NO_FUNCTION_TEMPLATE_ORDERING
638 using algorithm::join_if;
639 #else // BOOST_NO_FUNCTION_TEMPLATE_ORDERING
640 using algorithm::join_if_regex;
641 #endif // BOOST_NO_FUNCTION_TEMPLATE_ORDERING
646 #endif // BOOST_STRING_REGEX_HPP