human_regex.bases.general_regex

All examples here are written for the human_regex.StringRegex variant, but they are equally applicable for the human_regex.BytesRegex variant, if you use byte-strings (b"...") instead of strings ("...").

View Source

  1"""
  2All examples here are written for the `human_regex.StringRegex` variant, but
  3they are equally applicable for the `human_regex.BytesRegex` variant, if you
  4use byte-strings (`b"..."`) instead of strings (`"..."`).
  5"""
  6
  7from abc import abstractmethod
  8from collections.abc import Iterable
  9from typing import Self
 10
 11from ..contracts.abstract_regex import AbstractRegex
 12
 13Text_Element = str | bytes
 14"""
 15@private
 16"""
 17
 18
 19class GeneralRegexBase(AbstractRegex):
 20    """
 21    Base class for `human_regex.StringRegex` and `human_regex.BytesRegex`.
 22    """
 23
 24    @classmethod
 25    @property
 26    @abstractmethod
 27    def EMPTY(cls) -> Text_Element:
 28        """
 29        @private
 30        """
 31        ...
 32
 33    @classmethod
 34    @property
 35    @abstractmethod
 36    def OPEN_CHAR_SET(cls) -> Text_Element:
 37        """
 38        @private
 39        """
 40        ...
 41
 42    @classmethod
 43    @property
 44    @abstractmethod
 45    def CLOSE_CHAR_SET(cls) -> Text_Element:
 46        """
 47        @private
 48        """
 49        ...
 50
 51    @classmethod
 52    @property
 53    @abstractmethod
 54    def OPEN_GROUP(cls) -> Text_Element:
 55        """
 56        @private
 57        """
 58        ...
 59
 60    @classmethod
 61    @property
 62    @abstractmethod
 63    def CLOSE_GROUP(cls) -> Text_Element:
 64        """
 65        @private
 66        """
 67        ...
 68
 69    @classmethod
 70    @property
 71    @abstractmethod
 72    def OPEN_EXTENSION(cls) -> Text_Element:
 73        """
 74        @private
 75        """
 76        ...
 77
 78    @classmethod
 79    @property
 80    @abstractmethod
 81    def CLOSE_EXTENSION(cls) -> Text_Element:
 82        """
 83        @private
 84        """
 85        ...
 86
 87    @classmethod
 88    @property
 89    @abstractmethod
 90    def OPEN_NAME(cls) -> Text_Element:
 91        """
 92        @private
 93        """
 94        ...
 95
 96    @classmethod
 97    @property
 98    @abstractmethod
 99    def CLOSE_NAME(cls) -> Text_Element:
100        """
101        @private
102        """
103        ...
104
105    @classmethod
106    @property
107    @abstractmethod
108    def OPEN_QUANTIFIER(cls) -> Text_Element:
109        """
110        @private
111        """
112        ...
113
114    @classmethod
115    @property
116    @abstractmethod
117    def CLOSE_QUANTIFIER(cls) -> Text_Element:
118        """
119        @private
120        """
121        ...
122
123    @classmethod
124    @property
125    @abstractmethod
126    def QUANTIFIER_SEPARATOR(cls) -> Text_Element:
127        """
128        @private
129        """
130        ...
131
132    @classmethod
133    @property
134    @abstractmethod
135    def OR(cls) -> Text_Element:
136        """
137        @private
138        """
139        ...
140
141    @classmethod
142    @property
143    @abstractmethod
144    def NO_CAPTURE(cls) -> Text_Element:
145        """
146        @private
147        """
148        ...
149
150    @classmethod
151    @property
152    @abstractmethod
153    def FLAGS_END(cls) -> Text_Element:
154        """
155        @private
156        """
157        ...
158
159    @classmethod
160    @property
161    @abstractmethod
162    def ATOMIC(cls) -> Text_Element:
163        """
164        @private
165        """
166        ...
167
168    @classmethod
169    @property
170    @abstractmethod
171    def NAME_REFERENCE(cls) -> Text_Element:
172        """
173        @private
174        """
175        ...
176
177    @classmethod
178    @property
179    @abstractmethod
180    def COMMENT(cls) -> Text_Element:
181        """
182        @private
183        """
184        ...
185
186    @classmethod
187    @property
188    @abstractmethod
189    def FOLLOWED_BY(cls) -> Text_Element:
190        """
191        @private
192        """
193        ...
194
195    @classmethod
196    @property
197    @abstractmethod
198    def NOT_FOLLOWED_BY(cls) -> Text_Element:
199        """
200        @private
201        """
202        ...
203
204    @classmethod
205    @property
206    @abstractmethod
207    def PRECEDED_BY(cls) -> Text_Element:
208        """
209        @private
210        """
211        ...
212
213    @classmethod
214    @property
215    @abstractmethod
216    def NOT_PRECEDED_BY(cls) -> Text_Element:
217        """
218        @private
219        """
220        ...
221
222    @classmethod
223    @property
224    @abstractmethod
225    def ZERO_OR_MORE(cls) -> Text_Element:
226        """
227        @private
228        """
229        ...
230
231    @classmethod
232    @property
233    @abstractmethod
234    def ONE_OR_MORE(cls) -> Text_Element:
235        """
236        @private
237        """
238        ...
239
240    @classmethod
241    @property
242    @abstractmethod
243    def OPTIONAL(cls) -> Text_Element:
244        """
245        @private
246        """
247        ...
248
249    @classmethod
250    @property
251    @abstractmethod
252    def LAZY(cls) -> Text_Element:
253        """
254        @private
255        """
256        ...
257
258    @classmethod
259    def concatenate(cls, elements: Iterable[Text_Element]) -> Self:
260        """
261        Concatenates items in *elements*. Returns `StringRegex` instance of the joined string.
262
263        ```py
264        from human_regex import StringRegex as Sre
265
266        sre = Sre.concatenate(("Hello", " ", "world"))
267        assert sre == Sre("Hello world")
268        # equivalent to the following:
269        sre = Sre("".join(("Hello", " ", "world")))
270        sre = Sre("").join(("Hello", " ", "world"))
271        sre = Sre("Hello") + " " + "world"
272        ```
273        """
274        str_or_bytes = str if str in cls.__mro__ else bytes
275        result = str_or_bytes(cls.EMPTY).join(elements)
276        return cls(result)
277
278    def __add__(self, other: Text_Element) -> Self:
279        """
280        @public
281        Support of the `+` operator. Returns a new `StringRegex` instance which has *other*
282        appended to the original `StringRegex` instance.
283
284        ```py
285        from human_regex import StringRegex as Sre
286
287        sre1 = Sre("abc")
288        sre2 = Sre("def")
289        sre = sre1 + sre2
290        assert sre == Sre("abcdef")
291
292        # identical to:
293        sre = Sre("abc") + "def"
294        ```
295        """
296        cls = type(self)
297        return cls.concatenate((self, other))
298
299    def __or__(self, other) -> Self:
300        """
301        @public
302        Support of the `|` operator. Returns a new `StringRegex` instance which is
303        the original instance joined with *other* using the `|` symbol.
304
305        ```py
306        from human_regex import StringRegex as Sre
307
308        sre1 = Sre("abc")
309        sre2 = Sre("def")
310        sre = sre1 | sre2
311        assert sre == Sre("abc|def")
312
313        # identical to:
314        sre = Sre("abc") | "def"
315        ```
316        """
317        cls = type(self)
318        return cls.concatenate((self, cls.OR, other))
319
320    def append(self, appendent: Text_Element) -> Self:
321        """
322        @public
323        Returns a new `StringRegex` instance which is the original instance
324        followed by *appendent*.
325
326        ```py
327        from human_regex import StringRegex as Sre
328
329        sre = Sre("pretty").append(" little").append(Sre(" angel"))
330        assert sre == Sre("pretty little angel")
331        ```
332        """
333        cls = type(self)
334        return cls.concatenate((self, appendent))
335
336    def prepend(self, prependent: Text_Element) -> Self:
337        """
338        @public
339        Returns a new `StringRegex` instance with *self*
340        following the *prependent*.
341
342        This is useful when an already defined `StringRegex` instance is used
343        as a building block of a more complex `StringRegex` and we need
344        to add some `str` instances before it.
345
346        ```py
347        from human_regex import StringRegex as Sre
348
349        party = Sre("Party")
350        # Intention: to add the strings "A Long-" and "Expected "
351        # before the `party` instance.
352        #
353        # The built-in str does not have an .append method, so this will fail:
354        # "A Long-".append("Expected" ").append(party).
355        #
356        # If we use string addition, we would get a str instance
357        # rather than a StringRegex instance:
358        # s: str = "A Long-" + "Expected " + party
359        #
360        # To get a StringRegex instance, we can turn things around
361        # and instead prepend normal str instances to a StringRegex:
362        sre: Sre = party.prepend("Expected ").prepend("A Long-")
363        assert sre == Sre("A Long-Expected Party")
364
365        # Alternative: convert the starting element to StringRegex and add the rest:
366        sre: Sre = Sre("A Long-") + "Expected" + party
367        # Or concatenate:
368        sre: Sre = Sre.concatenate(("A Long-", "Expected ", party))
369        ```
370        """
371        cls = type(self)
372        return cls.concatenate((prependent, self))
373
374    def join(self, elements: Iterable[Text_Element]) -> Self:
375        # ruff: noqa: RUF002, E501
376        """
377        @public
378        Analogous to `str.join`, but produces instances of `StringRegex`:
379
380        ```py
381        from human_regex import StringRegex as Sre
382
383        sre = Sre(" ").join(("hello", "world"))
384        assert sre == Sre("hello world")
385        ```
386
387        A more complex example: A suboptimal regular expression for
388        parsing geographical longitude in the form *127° 36′ 52″ W*:
389
390        ```py
391        from human_regex import StringRegex as Sre
392
393        coordinates = Sre(" ").join((
394            Sre(r"\\d").repeat(1, 3).named("degrees").append("°"),
395            Sre(r"\\d").repeat(1, 2).named("minutes").append("′"),
396            Sre(r"\\d").repeat(1, 2).named("seconds").append("″"),
397            Sre("EW").set.named("direction"),
398        )).named("coordinates")
399        assert coordinates == r"(?P<coordinates>(?P<degrees>\\d{1,3})° (?P<minutes>\\d{1,2})′ (?P<seconds>\\d{1,2})″ (?P<direction>[EW]))"
400        ```
401        (The resulting regular expression is suboptimal because it would capture values of degrees, minutes, seconds which
402        are illegal or not part of the longitude notation norm, e.g. `190° 78′ 93″ E` would be a match.)
403        """
404        cls = type(self)
405        str_or_bytes = str if str in cls.__mro__ else bytes
406        result = str_or_bytes(self).join(elements)
407        return cls(result)
408
409    @property
410    def unnamed(self) -> Self:
411        """
412        @public
413        Creates an unnamed group with the contents of *self*.
414
415        ```py
416        from human_regex import StringRegex as Sre
417
418        number = Sre(r"\\d").one_or_more.unnamed
419        assert number == "(\\d+)"
420        sre = number.prepend("My favorite number is ").append(r"\\.")
421        assert sre == "My favorite number is (\\d+)\\."
422        ```
423        """
424        cls = type(self)
425        return cls.concatenate((cls.OPEN_GROUP, self, cls.CLOSE_GROUP))
426
427    @property
428    def extension(self) -> Self:
429        """
430        @public
431        Returns a new `StringRegex` instance with *self* inside the extension notation `(?...)`:
432
433        ```py
434        from human_regex import StringRegex as Sre
435
436        sre: Sre = Sre("something").extension
437        assert sre == "(?something)"
438        ```
439        """
440        cls = type(self)
441        return cls.concatenate((cls.OPEN_EXTENSION, self, cls.CLOSE_EXTENSION))
442
443    @classmethod
444    def set_flags(cls, flags: Text_Element) -> Self:
445        """
446        @public
447        A way of encoding regular expression flags into the expression string itself,
448        rather than passing it as *flag* argument to the `StringRegex.compile` or other methods.
449        This should only be used at the start of a regular expression.
450
451        Returns a `StringRegex` instance which sets the given *flags*. Flags can be one
452        or more letters from the set `a`, `i`, `L`, `m`, `s`, `u`, `x`.
453        Refer to the documentation of [re](https://docs.python.org/library/re.html),
454        search there for "aiLmsux".
455
456        ```py
457        from human_regex import StringRegex as Sre
458
459        # Create an expression with Sre.MULTILINE | Sre.IGNORECASE
460        sre = Sre.set_flags("mi").append("match.this")
461        assert sre == "(?mi)match.this"
462        ```
463        """
464        return cls(flags).extension
465
466    @property
467    def no_capture(self) -> Self:
468        """
469        @public
470        Returns a new `StringRegex` with a non-capturing group made of *self*:
471
472        ```py
473        from human_regex import StringRegex as Sre
474
475        sre = Sre("match").no_capture
476        assert sre == "(?:match)"
477        ```
478        """
479        cls = type(self)
480        return cls.concatenate((cls.NO_CAPTURE, self)).extension
481
482    def modify_flags(self, flags: Text_Element) -> Self:
483        """
484        @public
485        Allows you to set different flags for a part of a more complex expression.
486
487        Returns a new `StringRegex` instance with *self* inside the
488        modify-flags-extension with flags modified as specified by the
489        *flags* argument. Flags can be one
490        or more letters from the set `a`, `i`, `L`, `m`, `s`, `u`, `x`, optionally
491        followed by "-" followed by one or more letters from the `i`, `m`, `s`, `x` set.
492
493
494        Refer to the documentation of [re](https://docs.python.org/library/re.html),
495        search there for "aiLmsux-imsx".
496
497        ```py
498        from human_regex import StringRegex as Sre
499        part1 = Sre.set_flags("mi").append("multiline.and.ignore.case.here")
500        assert part1 == "(?mi)multiline.and.ignore.case.here"
501        part2 = Sre("add.dot.all.but.do.NOT.ignore.case.HERE.and.no.multiline").modify_flags("s-im")
502        assert part2 == "(?s-im:add.dot.all.but.do.NOT.ignore.case.HERE.and.no.multiline)"
503        part3 = "again.multiline.and.ignore.case.here"
504        sre = Sre.concatenate((part1, part2, part3))
505        assert sre == "(?mi)multiline.and.ignore.case.here(?s-im:add.dot.all.but.do.NOT.ignore.case.HERE.and.no.multiline)again.multiline.and.ignore.case.here"
506        ```
507        """
508        cls = type(self)
509        extension_core = cls.concatenate((flags, cls.FLAGS_END, self))
510        return extension_core.extension
511
512    @property
513    def atomic(self) -> Self:
514        """
515        @public
516        Returns a new `StringRegex` instance with *self* as the content of an *atomic group*.
517
518        ```py
519        from human_regex import StringRegex as Sre
520
521        sre = Sre("content").atomic
522        assert sre == "(?>content)"
523        ```
524        """
525        cls = type(self)
526        return cls.concatenate((cls.ATOMIC, self)).extension
527
528    def named(self, name: Text_Element) -> Self:
529        """
530        @public
531        Returns a new `StringRegex` instance with *self* as the content of a group named *name*.
532
533        ```py
534        from human_regex import StringRegex as Sre
535
536        word = Sre(r"\\w").one_or_more # \\w+
537        burger = word.named("burger")
538        assert burger == r"(?P<burger>\\w+)"
539        extra = word.named("extra")
540        assert extra == r"(?P<extra>\\w+)"
541
542        sre = burger + " with " + extra
543        assert sre == r"(?P<burger>\\w+) with (?P<extra>\\w+)"
544
545        match = sre.match("quarterpounder with cheese")
546        assert match.group("burger") == "quarterpounder"
547        assert match.group("extra") == "cheese"
548        ```
549        """
550        cls = type(self)
551        label = cls.concatenate((cls.OPEN_NAME, name, cls.CLOSE_NAME))
552        return cls.concatenate((label, self)).extension
553
554    @property
555    def backreference(self) -> Self:
556        """
557        @public
558        Returns `StringRegex` instance with *self* as the name of the group being refered back to.
559
560        ```py
561        from human_regex import StringRegex as Sre
562
563        word = Sre(r"\\w").one_or_more # \\w+
564        old_ruler = word.named("ruler") # (?P<ruler>\\w+)
565        new_ruler = Sre("ruler").backreference # (?P=ruler)
566        sre = Sre(" ").join(("The", old_ruler, "is dead, long live the", new_ruler.append("!")))
567
568        assert sre == "The (?P<ruler>\\w+) is dead, long live the (?P=ruler)!"
569        text = "The king is dead, long live the king!"
570        assert sre.match(text)
571        ```
572        """
573        cls = type(self)
574        return cls.concatenate((cls.NAME_REFERENCE, self)).extension
575
576    @property
577    def comment(self) -> Self:
578        """
579        @public
580        Returns a new `StringRegex` with *self* as a comment.
581
582        ```py
583        from human_regex import StringRegex as Sre
584
585        sre = Sre(r"0-9a-f").set + Sre("any hex digit").comment
586        assert sre == "[0-9a-f](?#any hex digit)"
587        ```
588        """
589        cls = type(self)
590        return cls.concatenate((cls.COMMENT, self)).extension
591
592    def followed_by(self, following: Text_Element) -> Self:
593        """
594        @public
595        Returns a new `StringRegex` with *self* extended by *following* as the *positive lookahead assertion*.
596
597        ```py
598        from human_regex import StringRegex as Sre
599
600        sre = Sre("Isaac ").followed_by("Asimov")
601        assert sre == "Isaac (?=Asimov)"
602        ```
603        """
604        cls = type(self)
605        follows = cls.concatenate((cls.FOLLOWED_BY, following)).extension
606        return cls.concatenate((self, follows))
607
608    def not_followed_by(self, not_following: Text_Element) -> Self:
609        """
610        @public
611        Returns a new `StringRegex` with *self* extended by *not_following* as the *negative lookahead assertion*.
612
613        ```py
614        from human_regex import StringRegex as Sre
615
616        sre = Sre("Isaac ").not_followed_by("Asimov")
617        assert sre == "Isaac (?!Asimov)"
618        ```
619        """
620        cls = type(self)
621        does_not_follow = cls.concatenate((cls.NOT_FOLLOWED_BY, not_following)).extension
622        return cls.concatenate((self, does_not_follow))
623
624    def preceded_by(self, preceding: Text_Element) -> Self:
625        """
626        @public
627        Returns a new `StringRegex` with *self* extended by *preceding* as the *positive lookbehind assertion*.
628
629        ```py
630        from human_regex import StringRegex as Sre
631
632        sre = Sre("chat").preceded_by("chit")
633        assert sre == "(?<=chit)chat"
634        ```
635        """
636        cls = type(self)
637        precedes = cls.concatenate((cls.PRECEDED_BY, preceding)).extension
638        return cls.concatenate((precedes, self))
639
640    def not_preceded_by(self, not_preceding: Text_Element) -> Self:
641        """
642        @public
643        Returns a new `StringRegex` with *self* extended by *not_preceding* as the *negative lookbehind assertion*.
644
645        ```py
646        from human_regex import StringRegex as Sre
647
648        sre = Sre("chat").not_preceded_by("chit")
649        assert sre == "(?<!chit)chat"
650        ```
651        """
652        cls = type(self)
653        does_not_precede = cls.concatenate((cls.NOT_PRECEDED_BY, not_preceding)).extension
654        return cls.concatenate((does_not_precede, self))
655
656    @classmethod
657    def yes_no(cls, id_name: int | Text_Element, yes: Text_Element, no: Text_Element | None = None) -> Self:
658        """
659        @public
660        Constructs the *yes-no-pattern* which will match with *yes*-pattern
661        if the group with given *id_name* exists, and with *no*-pattern if it doesn't.
662        *no*-pattern is optional and can be omitted. *id_name* can be the number
663        of the group or the name of the group if the group was named.
664
665        Example: Recreating the expression `(<)?(\\w+@\\w+(?:\\.\\w+)+)(?(1)>|$)`
666        from the built-in documentation of [re](https://docs.python.org/library/re.html)
667        for a poor email matching pattern, which will match with `<user@host.com>` as well as
668        `user@host.com`, but not with `<user@host.com` nor `user@host.com>`:
669
670        ```py
671        from human_regex import StringRegex as Sre
672
673        word = Sre(r"\\w").one_or_more # \\w+
674        mail_core = (
675            word +
676            "@" +
677            word +
678            word.prepend(r"\\.").no_capture.one_or_more
679        ).unnamed # (\\w+@\\w+(?:\\.\\w+)+)
680        maybe_less_than = Sre("<").unnamed.optional # (<)?
681        maybe_greater_than = Sre.yes_no(1, ">", "$") # (?(1)>|$)
682        mail_re = maybe_less_than + mail_core + maybe_greater_than
683
684        assert mail_re == r"(<)?(\\w+@\\w+(?:\\.\\w+)+)(?(1)>|$)"
685        ```
686        """
687        id_name = cls._convert_to_bytes_or_string(id_name) if isinstance(id_name, int) else id_name
688        result = cls.concatenate((cls(id_name).unnamed, yes))
689        if no is not None:
690            result += cls.concatenate((cls.OR, no))
691        return result.extension
692
693    @classmethod
694    def _convert_to_bytes_or_string(cls, i: int) -> Text_Element:
695        i = str(i)
696        if bytes in cls.__mro__:
697            i = i.encode()
698        return i
699
700    @property
701    def set(self) -> Self:
702        """
703        @public
704        Returns a new `StringRegex` for a set of *self*.
705
706        ```py
707        from human_regex import StringRegex as Sre
708
709        hex_digits = Sre("a-f0-9").set
710        assert hex_digits == "[a-f0-9]"
711        ```
712        """
713        cls = type(self)
714        return cls.concatenate((cls.OPEN_CHAR_SET, self, cls.CLOSE_CHAR_SET))
715
716    @property
717    def optional(self) -> Self:
718        """
719        @public
720        Returns a new `StringRegex` with `?` appended to *self*.
721
722        ```py
723        from human_regex import StringRegex as Sre
724
725        character = Sre(r"\\w")
726        optional_character = character.optional
727        assert optional_character == r"\\w?"
728        ```
729        """
730        cls = type(self)
731        return cls.concatenate((self, cls.OPTIONAL))
732
733    @property
734    def zero_or_more(self) -> Self:
735        """
736        @public
737        Returns a new `StringRegex` with `*` appended to *self*.
738
739        ```py
740        from human_regex import StringRegex as Sre
741
742        digit = Sre(r"\\d")
743        maybe_digits = digit.zero_or_more
744        assert maybe_digits == r"\\d*"
745        ```
746        """
747        cls = type(self)
748        return cls.concatenate((self, cls.ZERO_OR_MORE))
749
750    @property
751    def one_or_more(self) -> Self:
752        """
753        @public
754        Returns a new `StringRegex` with `+` appended to *self*.
755
756        ```py
757        from human_regex import StringRegex as Sre
758
759        digit = Sre(r"\\d")
760        some_digits = digit.one_or_more
761        assert some_digits == r"\\d+"
762        ```
763        """
764        cls = type(self)
765        return cls.concatenate((self, cls.ONE_OR_MORE))
766
767    @property
768    def lazy(self) -> Self:
769        """
770        @public
771        Returns a new `StringRegex` with `?` appended to *self*.
772
773        ```py
774        from human_regex import StringRegex as Sre
775
776        everything = Sre(".*")
777        assert everything.lazy == ".*?"
778
779        tab = Sre(r"\\t")
780        everything_before_first_tab = everything.prepend("^").lazy.named("before_tab").append(tab)
781        assert everything_before_first_tab == r"(?P<before_tab>^.*?)\\t"
782        ```
783        """
784        cls = type(self)
785        return cls.concatenate((self, cls.LAZY))
786
787    def repeat(self, minimum, maximum, /) -> Self:
788        """
789        @public
790        Returns a new `StringRegex` with a greedy quantifier appended to *self*.
791        *minimum* and *maximum* specify limits of repetition. *maximum* is optional
792
793        ```py
794        from human_regex import StringRegex as Sre
795
796        two_or_more_As = Sre("A").repeat(2, None)
797        assert two_or_more_As == "A{2,}"
798        two_to_four_As = Sre("A").repeat(2, 4)
799        assert two_to_four_As == "A{2,4}"
800        up_to_four_As = Sre("A").repeat(None, 4)
801        assert up_to_four_As == "A{,4}"
802        ```
803        """
804        cls = type(self)
805        minimum = cls._convert_to_bytes_or_string(minimum) if minimum is not None else cls.EMPTY
806        maximum = cls._convert_to_bytes_or_string(maximum) if maximum is not None else cls.EMPTY
807        return cls.concatenate(
808            (self, cls.OPEN_QUANTIFIER, minimum, cls.QUANTIFIER_SEPARATOR, maximum, cls.CLOSE_QUANTIFIER)
809        )
810
811    def exactly(self, number: int) -> Self:
812        """
813        @public
814        Returns a new `StringRegex` with a fixed quantifier of *number* appended to *self*.
815
816        ```py
817        from human_regex import StringRegex as Sre
818
819        three_As = Sre("A").exactly(3)
820        assert three_As == "A{3}"
821        ```
822        """
823        cls = type(self)
824        number = cls._convert_to_bytes_or_string(number) if number else cls.EMPTY
825        return cls.concatenate((self, cls.OPEN_QUANTIFIER, number, cls.CLOSE_QUANTIFIER))

class GeneralRegexBase(human_regex.contracts.abstract_regex.AbstractRegex): View Source

 20class GeneralRegexBase(AbstractRegex):
 21    """
 22    Base class for `human_regex.StringRegex` and `human_regex.BytesRegex`.
 23    """
 24
 25    @classmethod
 26    @property
 27    @abstractmethod
 28    def EMPTY(cls) -> Text_Element:
 29        """
 30        @private
 31        """
 32        ...
 33
 34    @classmethod
 35    @property
 36    @abstractmethod
 37    def OPEN_CHAR_SET(cls) -> Text_Element:
 38        """
 39        @private
 40        """
 41        ...
 42
 43    @classmethod
 44    @property
 45    @abstractmethod
 46    def CLOSE_CHAR_SET(cls) -> Text_Element:
 47        """
 48        @private
 49        """
 50        ...
 51
 52    @classmethod
 53    @property
 54    @abstractmethod
 55    def OPEN_GROUP(cls) -> Text_Element:
 56        """
 57        @private
 58        """
 59        ...
 60
 61    @classmethod
 62    @property
 63    @abstractmethod
 64    def CLOSE_GROUP(cls) -> Text_Element:
 65        """
 66        @private
 67        """
 68        ...
 69
 70    @classmethod
 71    @property
 72    @abstractmethod
 73    def OPEN_EXTENSION(cls) -> Text_Element:
 74        """
 75        @private
 76        """
 77        ...
 78
 79    @classmethod
 80    @property
 81    @abstractmethod
 82    def CLOSE_EXTENSION(cls) -> Text_Element:
 83        """
 84        @private
 85        """
 86        ...
 87
 88    @classmethod
 89    @property
 90    @abstractmethod
 91    def OPEN_NAME(cls) -> Text_Element:
 92        """
 93        @private
 94        """
 95        ...
 96
 97    @classmethod
 98    @property
 99    @abstractmethod
100    def CLOSE_NAME(cls) -> Text_Element:
101        """
102        @private
103        """
104        ...
105
106    @classmethod
107    @property
108    @abstractmethod
109    def OPEN_QUANTIFIER(cls) -> Text_Element:
110        """
111        @private
112        """
113        ...
114
115    @classmethod
116    @property
117    @abstractmethod
118    def CLOSE_QUANTIFIER(cls) -> Text_Element:
119        """
120        @private
121        """
122        ...
123
124    @classmethod
125    @property
126    @abstractmethod
127    def QUANTIFIER_SEPARATOR(cls) -> Text_Element:
128        """
129        @private
130        """
131        ...
132
133    @classmethod
134    @property
135    @abstractmethod
136    def OR(cls) -> Text_Element:
137        """
138        @private
139        """
140        ...
141
142    @classmethod
143    @property
144    @abstractmethod
145    def NO_CAPTURE(cls) -> Text_Element:
146        """
147        @private
148        """
149        ...
150
151    @classmethod
152    @property
153    @abstractmethod
154    def FLAGS_END(cls) -> Text_Element:
155        """
156        @private
157        """
158        ...
159
160    @classmethod
161    @property
162    @abstractmethod
163    def ATOMIC(cls) -> Text_Element:
164        """
165        @private
166        """
167        ...
168
169    @classmethod
170    @property
171    @abstractmethod
172    def NAME_REFERENCE(cls) -> Text_Element:
173        """
174        @private
175        """
176        ...
177
178    @classmethod
179    @property
180    @abstractmethod
181    def COMMENT(cls) -> Text_Element:
182        """
183        @private
184        """
185        ...
186
187    @classmethod
188    @property
189    @abstractmethod
190    def FOLLOWED_BY(cls) -> Text_Element:
191        """
192        @private
193        """
194        ...
195
196    @classmethod
197    @property
198    @abstractmethod
199    def NOT_FOLLOWED_BY(cls) -> Text_Element:
200        """
201        @private
202        """
203        ...
204
205    @classmethod
206    @property
207    @abstractmethod
208    def PRECEDED_BY(cls) -> Text_Element:
209        """
210        @private
211        """
212        ...
213
214    @classmethod
215    @property
216    @abstractmethod
217    def NOT_PRECEDED_BY(cls) -> Text_Element:
218        """
219        @private
220        """
221        ...
222
223    @classmethod
224    @property
225    @abstractmethod
226    def ZERO_OR_MORE(cls) -> Text_Element:
227        """
228        @private
229        """
230        ...
231
232    @classmethod
233    @property
234    @abstractmethod
235    def ONE_OR_MORE(cls) -> Text_Element:
236        """
237        @private
238        """
239        ...
240
241    @classmethod
242    @property
243    @abstractmethod
244    def OPTIONAL(cls) -> Text_Element:
245        """
246        @private
247        """
248        ...
249
250    @classmethod
251    @property
252    @abstractmethod
253    def LAZY(cls) -> Text_Element:
254        """
255        @private
256        """
257        ...
258
259    @classmethod
260    def concatenate(cls, elements: Iterable[Text_Element]) -> Self:
261        """
262        Concatenates items in *elements*. Returns `StringRegex` instance of the joined string.
263
264        ```py
265        from human_regex import StringRegex as Sre
266
267        sre = Sre.concatenate(("Hello", " ", "world"))
268        assert sre == Sre("Hello world")
269        # equivalent to the following:
270        sre = Sre("".join(("Hello", " ", "world")))
271        sre = Sre("").join(("Hello", " ", "world"))
272        sre = Sre("Hello") + " " + "world"
273        ```
274        """
275        str_or_bytes = str if str in cls.__mro__ else bytes
276        result = str_or_bytes(cls.EMPTY).join(elements)
277        return cls(result)
278
279    def __add__(self, other: Text_Element) -> Self:
280        """
281        @public
282        Support of the `+` operator. Returns a new `StringRegex` instance which has *other*
283        appended to the original `StringRegex` instance.
284
285        ```py
286        from human_regex import StringRegex as Sre
287
288        sre1 = Sre("abc")
289        sre2 = Sre("def")
290        sre = sre1 + sre2
291        assert sre == Sre("abcdef")
292
293        # identical to:
294        sre = Sre("abc") + "def"
295        ```
296        """
297        cls = type(self)
298        return cls.concatenate((self, other))
299
300    def __or__(self, other) -> Self:
301        """
302        @public
303        Support of the `|` operator. Returns a new `StringRegex` instance which is
304        the original instance joined with *other* using the `|` symbol.
305
306        ```py
307        from human_regex import StringRegex as Sre
308
309        sre1 = Sre("abc")
310        sre2 = Sre("def")
311        sre = sre1 | sre2
312        assert sre == Sre("abc|def")
313
314        # identical to:
315        sre = Sre("abc") | "def"
316        ```
317        """
318        cls = type(self)
319        return cls.concatenate((self, cls.OR, other))
320
321    def append(self, appendent: Text_Element) -> Self:
322        """
323        @public
324        Returns a new `StringRegex` instance which is the original instance
325        followed by *appendent*.
326
327        ```py
328        from human_regex import StringRegex as Sre
329
330        sre = Sre("pretty").append(" little").append(Sre(" angel"))
331        assert sre == Sre("pretty little angel")
332        ```
333        """
334        cls = type(self)
335        return cls.concatenate((self, appendent))
336
337    def prepend(self, prependent: Text_Element) -> Self:
338        """
339        @public
340        Returns a new `StringRegex` instance with *self*
341        following the *prependent*.
342
343        This is useful when an already defined `StringRegex` instance is used
344        as a building block of a more complex `StringRegex` and we need
345        to add some `str` instances before it.
346
347        ```py
348        from human_regex import StringRegex as Sre
349
350        party = Sre("Party")
351        # Intention: to add the strings "A Long-" and "Expected "
352        # before the `party` instance.
353        #
354        # The built-in str does not have an .append method, so this will fail:
355        # "A Long-".append("Expected" ").append(party).
356        #
357        # If we use string addition, we would get a str instance
358        # rather than a StringRegex instance:
359        # s: str = "A Long-" + "Expected " + party
360        #
361        # To get a StringRegex instance, we can turn things around
362        # and instead prepend normal str instances to a StringRegex:
363        sre: Sre = party.prepend("Expected ").prepend("A Long-")
364        assert sre == Sre("A Long-Expected Party")
365
366        # Alternative: convert the starting element to StringRegex and add the rest:
367        sre: Sre = Sre("A Long-") + "Expected" + party
368        # Or concatenate:
369        sre: Sre = Sre.concatenate(("A Long-", "Expected ", party))
370        ```
371        """
372        cls = type(self)
373        return cls.concatenate((prependent, self))
374
375    def join(self, elements: Iterable[Text_Element]) -> Self:
376        # ruff: noqa: RUF002, E501
377        """
378        @public
379        Analogous to `str.join`, but produces instances of `StringRegex`:
380
381        ```py
382        from human_regex import StringRegex as Sre
383
384        sre = Sre(" ").join(("hello", "world"))
385        assert sre == Sre("hello world")
386        ```
387
388        A more complex example: A suboptimal regular expression for
389        parsing geographical longitude in the form *127° 36′ 52″ W*:
390
391        ```py
392        from human_regex import StringRegex as Sre
393
394        coordinates = Sre(" ").join((
395            Sre(r"\\d").repeat(1, 3).named("degrees").append("°"),
396            Sre(r"\\d").repeat(1, 2).named("minutes").append("′"),
397            Sre(r"\\d").repeat(1, 2).named("seconds").append("″"),
398            Sre("EW").set.named("direction"),
399        )).named("coordinates")
400        assert coordinates == r"(?P<coordinates>(?P<degrees>\\d{1,3})° (?P<minutes>\\d{1,2})′ (?P<seconds>\\d{1,2})″ (?P<direction>[EW]))"
401        ```
402        (The resulting regular expression is suboptimal because it would capture values of degrees, minutes, seconds which
403        are illegal or not part of the longitude notation norm, e.g. `190° 78′ 93″ E` would be a match.)
404        """
405        cls = type(self)
406        str_or_bytes = str if str in cls.__mro__ else bytes
407        result = str_or_bytes(self).join(elements)
408        return cls(result)
409
410    @property
411    def unnamed(self) -> Self:
412        """
413        @public
414        Creates an unnamed group with the contents of *self*.
415
416        ```py
417        from human_regex import StringRegex as Sre
418
419        number = Sre(r"\\d").one_or_more.unnamed
420        assert number == "(\\d+)"
421        sre = number.prepend("My favorite number is ").append(r"\\.")
422        assert sre == "My favorite number is (\\d+)\\."
423        ```
424        """
425        cls = type(self)
426        return cls.concatenate((cls.OPEN_GROUP, self, cls.CLOSE_GROUP))
427
428    @property
429    def extension(self) -> Self:
430        """
431        @public
432        Returns a new `StringRegex` instance with *self* inside the extension notation `(?...)`:
433
434        ```py
435        from human_regex import StringRegex as Sre
436
437        sre: Sre = Sre("something").extension
438        assert sre == "(?something)"
439        ```
440        """
441        cls = type(self)
442        return cls.concatenate((cls.OPEN_EXTENSION, self, cls.CLOSE_EXTENSION))
443
444    @classmethod
445    def set_flags(cls, flags: Text_Element) -> Self:
446        """
447        @public
448        A way of encoding regular expression flags into the expression string itself,
449        rather than passing it as *flag* argument to the `StringRegex.compile` or other methods.
450        This should only be used at the start of a regular expression.
451
452        Returns a `StringRegex` instance which sets the given *flags*. Flags can be one
453        or more letters from the set `a`, `i`, `L`, `m`, `s`, `u`, `x`.
454        Refer to the documentation of [re](https://docs.python.org/library/re.html),
455        search there for "aiLmsux".
456
457        ```py
458        from human_regex import StringRegex as Sre
459
460        # Create an expression with Sre.MULTILINE | Sre.IGNORECASE
461        sre = Sre.set_flags("mi").append("match.this")
462        assert sre == "(?mi)match.this"
463        ```
464        """
465        return cls(flags).extension
466
467    @property
468    def no_capture(self) -> Self:
469        """
470        @public
471        Returns a new `StringRegex` with a non-capturing group made of *self*:
472
473        ```py
474        from human_regex import StringRegex as Sre
475
476        sre = Sre("match").no_capture
477        assert sre == "(?:match)"
478        ```
479        """
480        cls = type(self)
481        return cls.concatenate((cls.NO_CAPTURE, self)).extension
482
483    def modify_flags(self, flags: Text_Element) -> Self:
484        """
485        @public
486        Allows you to set different flags for a part of a more complex expression.
487
488        Returns a new `StringRegex` instance with *self* inside the
489        modify-flags-extension with flags modified as specified by the
490        *flags* argument. Flags can be one
491        or more letters from the set `a`, `i`, `L`, `m`, `s`, `u`, `x`, optionally
492        followed by "-" followed by one or more letters from the `i`, `m`, `s`, `x` set.
493
494
495        Refer to the documentation of [re](https://docs.python.org/library/re.html),
496        search there for "aiLmsux-imsx".
497
498        ```py
499        from human_regex import StringRegex as Sre
500        part1 = Sre.set_flags("mi").append("multiline.and.ignore.case.here")
501        assert part1 == "(?mi)multiline.and.ignore.case.here"
502        part2 = Sre("add.dot.all.but.do.NOT.ignore.case.HERE.and.no.multiline").modify_flags("s-im")
503        assert part2 == "(?s-im:add.dot.all.but.do.NOT.ignore.case.HERE.and.no.multiline)"
504        part3 = "again.multiline.and.ignore.case.here"
505        sre = Sre.concatenate((part1, part2, part3))
506        assert sre == "(?mi)multiline.and.ignore.case.here(?s-im:add.dot.all.but.do.NOT.ignore.case.HERE.and.no.multiline)again.multiline.and.ignore.case.here"
507        ```
508        """
509        cls = type(self)
510        extension_core = cls.concatenate((flags, cls.FLAGS_END, self))
511        return extension_core.extension
512
513    @property
514    def atomic(self) -> Self:
515        """
516        @public
517        Returns a new `StringRegex` instance with *self* as the content of an *atomic group*.
518
519        ```py
520        from human_regex import StringRegex as Sre
521
522        sre = Sre("content").atomic
523        assert sre == "(?>content)"
524        ```
525        """
526        cls = type(self)
527        return cls.concatenate((cls.ATOMIC, self)).extension
528
529    def named(self, name: Text_Element) -> Self:
530        """
531        @public
532        Returns a new `StringRegex` instance with *self* as the content of a group named *name*.
533
534        ```py
535        from human_regex import StringRegex as Sre
536
537        word = Sre(r"\\w").one_or_more # \\w+
538        burger = word.named("burger")
539        assert burger == r"(?P<burger>\\w+)"
540        extra = word.named("extra")
541        assert extra == r"(?P<extra>\\w+)"
542
543        sre = burger + " with " + extra
544        assert sre == r"(?P<burger>\\w+) with (?P<extra>\\w+)"
545
546        match = sre.match("quarterpounder with cheese")
547        assert match.group("burger") == "quarterpounder"
548        assert match.group("extra") == "cheese"
549        ```
550        """
551        cls = type(self)
552        label = cls.concatenate((cls.OPEN_NAME, name, cls.CLOSE_NAME))
553        return cls.concatenate((label, self)).extension
554
555    @property
556    def backreference(self) -> Self:
557        """
558        @public
559        Returns `StringRegex` instance with *self* as the name of the group being refered back to.
560
561        ```py
562        from human_regex import StringRegex as Sre
563
564        word = Sre(r"\\w").one_or_more # \\w+
565        old_ruler = word.named("ruler") # (?P<ruler>\\w+)
566        new_ruler = Sre("ruler").backreference # (?P=ruler)
567        sre = Sre(" ").join(("The", old_ruler, "is dead, long live the", new_ruler.append("!")))
568
569        assert sre == "The (?P<ruler>\\w+) is dead, long live the (?P=ruler)!"
570        text = "The king is dead, long live the king!"
571        assert sre.match(text)
572        ```
573        """
574        cls = type(self)
575        return cls.concatenate((cls.NAME_REFERENCE, self)).extension
576
577    @property
578    def comment(self) -> Self:
579        """
580        @public
581        Returns a new `StringRegex` with *self* as a comment.
582
583        ```py
584        from human_regex import StringRegex as Sre
585
586        sre = Sre(r"0-9a-f").set + Sre("any hex digit").comment
587        assert sre == "[0-9a-f](?#any hex digit)"
588        ```
589        """
590        cls = type(self)
591        return cls.concatenate((cls.COMMENT, self)).extension
592
593    def followed_by(self, following: Text_Element) -> Self:
594        """
595        @public
596        Returns a new `StringRegex` with *self* extended by *following* as the *positive lookahead assertion*.
597
598        ```py
599        from human_regex import StringRegex as Sre
600
601        sre = Sre("Isaac ").followed_by("Asimov")
602        assert sre == "Isaac (?=Asimov)"
603        ```
604        """
605        cls = type(self)
606        follows = cls.concatenate((cls.FOLLOWED_BY, following)).extension
607        return cls.concatenate((self, follows))
608
609    def not_followed_by(self, not_following: Text_Element) -> Self:
610        """
611        @public
612        Returns a new `StringRegex` with *self* extended by *not_following* as the *negative lookahead assertion*.
613
614        ```py
615        from human_regex import StringRegex as Sre
616
617        sre = Sre("Isaac ").not_followed_by("Asimov")
618        assert sre == "Isaac (?!Asimov)"
619        ```
620        """
621        cls = type(self)
622        does_not_follow = cls.concatenate((cls.NOT_FOLLOWED_BY, not_following)).extension
623        return cls.concatenate((self, does_not_follow))
624
625    def preceded_by(self, preceding: Text_Element) -> Self:
626        """
627        @public
628        Returns a new `StringRegex` with *self* extended by *preceding* as the *positive lookbehind assertion*.
629
630        ```py
631        from human_regex import StringRegex as Sre
632
633        sre = Sre("chat").preceded_by("chit")
634        assert sre == "(?<=chit)chat"
635        ```
636        """
637        cls = type(self)
638        precedes = cls.concatenate((cls.PRECEDED_BY, preceding)).extension
639        return cls.concatenate((precedes, self))
640
641    def not_preceded_by(self, not_preceding: Text_Element) -> Self:
642        """
643        @public
644        Returns a new `StringRegex` with *self* extended by *not_preceding* as the *negative lookbehind assertion*.
645
646        ```py
647        from human_regex import StringRegex as Sre
648
649        sre = Sre("chat").not_preceded_by("chit")
650        assert sre == "(?<!chit)chat"
651        ```
652        """
653        cls = type(self)
654        does_not_precede = cls.concatenate((cls.NOT_PRECEDED_BY, not_preceding)).extension
655        return cls.concatenate((does_not_precede, self))
656
657    @classmethod
658    def yes_no(cls, id_name: int | Text_Element, yes: Text_Element, no: Text_Element | None = None) -> Self:
659        """
660        @public
661        Constructs the *yes-no-pattern* which will match with *yes*-pattern
662        if the group with given *id_name* exists, and with *no*-pattern if it doesn't.
663        *no*-pattern is optional and can be omitted. *id_name* can be the number
664        of the group or the name of the group if the group was named.
665
666        Example: Recreating the expression `(<)?(\\w+@\\w+(?:\\.\\w+)+)(?(1)>|$)`
667        from the built-in documentation of [re](https://docs.python.org/library/re.html)
668        for a poor email matching pattern, which will match with `<user@host.com>` as well as
669        `user@host.com`, but not with `<user@host.com` nor `user@host.com>`:
670
671        ```py
672        from human_regex import StringRegex as Sre
673
674        word = Sre(r"\\w").one_or_more # \\w+
675        mail_core = (
676            word +
677            "@" +
678            word +
679            word.prepend(r"\\.").no_capture.one_or_more
680        ).unnamed # (\\w+@\\w+(?:\\.\\w+)+)
681        maybe_less_than = Sre("<").unnamed.optional # (<)?
682        maybe_greater_than = Sre.yes_no(1, ">", "$") # (?(1)>|$)
683        mail_re = maybe_less_than + mail_core + maybe_greater_than
684
685        assert mail_re == r"(<)?(\\w+@\\w+(?:\\.\\w+)+)(?(1)>|$)"
686        ```
687        """
688        id_name = cls._convert_to_bytes_or_string(id_name) if isinstance(id_name, int) else id_name
689        result = cls.concatenate((cls(id_name).unnamed, yes))
690        if no is not None:
691            result += cls.concatenate((cls.OR, no))
692        return result.extension
693
694    @classmethod
695    def _convert_to_bytes_or_string(cls, i: int) -> Text_Element:
696        i = str(i)
697        if bytes in cls.__mro__:
698            i = i.encode()
699        return i
700
701    @property
702    def set(self) -> Self:
703        """
704        @public
705        Returns a new `StringRegex` for a set of *self*.
706
707        ```py
708        from human_regex import StringRegex as Sre
709
710        hex_digits = Sre("a-f0-9").set
711        assert hex_digits == "[a-f0-9]"
712        ```
713        """
714        cls = type(self)
715        return cls.concatenate((cls.OPEN_CHAR_SET, self, cls.CLOSE_CHAR_SET))
716
717    @property
718    def optional(self) -> Self:
719        """
720        @public
721        Returns a new `StringRegex` with `?` appended to *self*.
722
723        ```py
724        from human_regex import StringRegex as Sre
725
726        character = Sre(r"\\w")
727        optional_character = character.optional
728        assert optional_character == r"\\w?"
729        ```
730        """
731        cls = type(self)
732        return cls.concatenate((self, cls.OPTIONAL))
733
734    @property
735    def zero_or_more(self) -> Self:
736        """
737        @public
738        Returns a new `StringRegex` with `*` appended to *self*.
739
740        ```py
741        from human_regex import StringRegex as Sre
742
743        digit = Sre(r"\\d")
744        maybe_digits = digit.zero_or_more
745        assert maybe_digits == r"\\d*"
746        ```
747        """
748        cls = type(self)
749        return cls.concatenate((self, cls.ZERO_OR_MORE))
750
751    @property
752    def one_or_more(self) -> Self:
753        """
754        @public
755        Returns a new `StringRegex` with `+` appended to *self*.
756
757        ```py
758        from human_regex import StringRegex as Sre
759
760        digit = Sre(r"\\d")
761        some_digits = digit.one_or_more
762        assert some_digits == r"\\d+"
763        ```
764        """
765        cls = type(self)
766        return cls.concatenate((self, cls.ONE_OR_MORE))
767
768    @property
769    def lazy(self) -> Self:
770        """
771        @public
772        Returns a new `StringRegex` with `?` appended to *self*.
773
774        ```py
775        from human_regex import StringRegex as Sre
776
777        everything = Sre(".*")
778        assert everything.lazy == ".*?"
779
780        tab = Sre(r"\\t")
781        everything_before_first_tab = everything.prepend("^").lazy.named("before_tab").append(tab)
782        assert everything_before_first_tab == r"(?P<before_tab>^.*?)\\t"
783        ```
784        """
785        cls = type(self)
786        return cls.concatenate((self, cls.LAZY))
787
788    def repeat(self, minimum, maximum, /) -> Self:
789        """
790        @public
791        Returns a new `StringRegex` with a greedy quantifier appended to *self*.
792        *minimum* and *maximum* specify limits of repetition. *maximum* is optional
793
794        ```py
795        from human_regex import StringRegex as Sre
796
797        two_or_more_As = Sre("A").repeat(2, None)
798        assert two_or_more_As == "A{2,}"
799        two_to_four_As = Sre("A").repeat(2, 4)
800        assert two_to_four_As == "A{2,4}"
801        up_to_four_As = Sre("A").repeat(None, 4)
802        assert up_to_four_As == "A{,4}"
803        ```
804        """
805        cls = type(self)
806        minimum = cls._convert_to_bytes_or_string(minimum) if minimum is not None else cls.EMPTY
807        maximum = cls._convert_to_bytes_or_string(maximum) if maximum is not None else cls.EMPTY
808        return cls.concatenate(
809            (self, cls.OPEN_QUANTIFIER, minimum, cls.QUANTIFIER_SEPARATOR, maximum, cls.CLOSE_QUANTIFIER)
810        )
811
812    def exactly(self, number: int) -> Self:
813        """
814        @public
815        Returns a new `StringRegex` with a fixed quantifier of *number* appended to *self*.
816
817        ```py
818        from human_regex import StringRegex as Sre
819
820        three_As = Sre("A").exactly(3)
821        assert three_As == "A{3}"
822        ```
823        """
824        cls = type(self)
825        number = cls._convert_to_bytes_or_string(number) if number else cls.EMPTY
826        return cls.concatenate((self, cls.OPEN_QUANTIFIER, number, cls.CLOSE_QUANTIFIER))

Base class for human_regex.StringRegex and human_regex.BytesRegex.

@classmethod

def concatenate(cls, elements: collections.abc.Iterable[str | bytes]) -> Self: View Source

259    @classmethod
260    def concatenate(cls, elements: Iterable[Text_Element]) -> Self:
261        """
262        Concatenates items in *elements*. Returns `StringRegex` instance of the joined string.
263
264        ```py
265        from human_regex import StringRegex as Sre
266
267        sre = Sre.concatenate(("Hello", " ", "world"))
268        assert sre == Sre("Hello world")
269        # equivalent to the following:
270        sre = Sre("".join(("Hello", " ", "world")))
271        sre = Sre("").join(("Hello", " ", "world"))
272        sre = Sre("Hello") + " " + "world"
273        ```
274        """
275        str_or_bytes = str if str in cls.__mro__ else bytes
276        result = str_or_bytes(cls.EMPTY).join(elements)
277        return cls(result)

Concatenates items in elements. Returns StringRegex instance of the joined string.

from human_regex import StringRegex as Sre

sre = Sre.concatenate(("Hello", " ", "world"))
assert sre == Sre("Hello world")
# equivalent to the following:
sre = Sre("".join(("Hello", " ", "world")))
sre = Sre("").join(("Hello", " ", "world"))
sre = Sre("Hello") + " " + "world"

def __add__(self, other: str | bytes) -> Self: View Source

279    def __add__(self, other: Text_Element) -> Self:
280        """
281        @public
282        Support of the `+` operator. Returns a new `StringRegex` instance which has *other*
283        appended to the original `StringRegex` instance.
284
285        ```py
286        from human_regex import StringRegex as Sre
287
288        sre1 = Sre("abc")
289        sre2 = Sre("def")
290        sre = sre1 + sre2
291        assert sre == Sre("abcdef")
292
293        # identical to:
294        sre = Sre("abc") + "def"
295        ```
296        """
297        cls = type(self)
298        return cls.concatenate((self, other))

Support of the + operator. Returns a new StringRegex instance which has other appended to the original StringRegex instance.

from human_regex import StringRegex as Sre

sre1 = Sre("abc")
sre2 = Sre("def")
sre = sre1 + sre2
assert sre == Sre("abcdef")

# identical to:
sre = Sre("abc") + "def"

def __or__(self, other) -> Self: View Source

300    def __or__(self, other) -> Self:
301        """
302        @public
303        Support of the `|` operator. Returns a new `StringRegex` instance which is
304        the original instance joined with *other* using the `|` symbol.
305
306        ```py
307        from human_regex import StringRegex as Sre
308
309        sre1 = Sre("abc")
310        sre2 = Sre("def")
311        sre = sre1 | sre2
312        assert sre == Sre("abc|def")
313
314        # identical to:
315        sre = Sre("abc") | "def"
316        ```
317        """
318        cls = type(self)
319        return cls.concatenate((self, cls.OR, other))

Support of the | operator. Returns a new StringRegex instance which is the original instance joined with other using the | symbol.

from human_regex import StringRegex as Sre

sre1 = Sre("abc")
sre2 = Sre("def")
sre = sre1 | sre2
assert sre == Sre("abc|def")

# identical to:
sre = Sre("abc") | "def"

def append(self, appendent: str | bytes) -> Self: View Source

321    def append(self, appendent: Text_Element) -> Self:
322        """
323        @public
324        Returns a new `StringRegex` instance which is the original instance
325        followed by *appendent*.
326
327        ```py
328        from human_regex import StringRegex as Sre
329
330        sre = Sre("pretty").append(" little").append(Sre(" angel"))
331        assert sre == Sre("pretty little angel")
332        ```
333        """
334        cls = type(self)
335        return cls.concatenate((self, appendent))

Returns a new StringRegex instance which is the original instance followed by appendent.

from human_regex import StringRegex as Sre

sre = Sre("pretty").append(" little").append(Sre(" angel"))
assert sre == Sre("pretty little angel")

def prepend(self, prependent: str | bytes) -> Self: View Source

337    def prepend(self, prependent: Text_Element) -> Self:
338        """
339        @public
340        Returns a new `StringRegex` instance with *self*
341        following the *prependent*.
342
343        This is useful when an already defined `StringRegex` instance is used
344        as a building block of a more complex `StringRegex` and we need
345        to add some `str` instances before it.
346
347        ```py
348        from human_regex import StringRegex as Sre
349
350        party = Sre("Party")
351        # Intention: to add the strings "A Long-" and "Expected "
352        # before the `party` instance.
353        #
354        # The built-in str does not have an .append method, so this will fail:
355        # "A Long-".append("Expected" ").append(party).
356        #
357        # If we use string addition, we would get a str instance
358        # rather than a StringRegex instance:
359        # s: str = "A Long-" + "Expected " + party
360        #
361        # To get a StringRegex instance, we can turn things around
362        # and instead prepend normal str instances to a StringRegex:
363        sre: Sre = party.prepend("Expected ").prepend("A Long-")
364        assert sre == Sre("A Long-Expected Party")
365
366        # Alternative: convert the starting element to StringRegex and add the rest:
367        sre: Sre = Sre("A Long-") + "Expected" + party
368        # Or concatenate:
369        sre: Sre = Sre.concatenate(("A Long-", "Expected ", party))
370        ```
371        """
372        cls = type(self)
373        return cls.concatenate((prependent, self))

Returns a new StringRegex instance with self following the prependent.

This is useful when an already defined StringRegex instance is used as a building block of a more complex StringRegex and we need to add some str instances before it.

from human_regex import StringRegex as Sre

party = Sre("Party")
# Intention: to add the strings "A Long-" and "Expected "
# before the `party` instance.
#
# The built-in str does not have an .append method, so this will fail:
# "A Long-".append("Expected" ").append(party).
#
# If we use string addition, we would get a str instance
# rather than a StringRegex instance:
# s: str = "A Long-" + "Expected " + party
#
# To get a StringRegex instance, we can turn things around
# and instead prepend normal str instances to a StringRegex:
sre: Sre = party.prepend("Expected ").prepend("A Long-")
assert sre == Sre("A Long-Expected Party")

# Alternative: convert the starting element to StringRegex and add the rest:
sre: Sre = Sre("A Long-") + "Expected" + party
# Or concatenate:
sre: Sre = Sre.concatenate(("A Long-", "Expected ", party))

def join(self, elements: collections.abc.Iterable[str | bytes]) -> Self: View Source

375    def join(self, elements: Iterable[Text_Element]) -> Self:
376        # ruff: noqa: RUF002, E501
377        """
378        @public
379        Analogous to `str.join`, but produces instances of `StringRegex`:
380
381        ```py
382        from human_regex import StringRegex as Sre
383
384        sre = Sre(" ").join(("hello", "world"))
385        assert sre == Sre("hello world")
386        ```
387
388        A more complex example: A suboptimal regular expression for
389        parsing geographical longitude in the form *127° 36′ 52″ W*:
390
391        ```py
392        from human_regex import StringRegex as Sre
393
394        coordinates = Sre(" ").join((
395            Sre(r"\\d").repeat(1, 3).named("degrees").append("°"),
396            Sre(r"\\d").repeat(1, 2).named("minutes").append("′"),
397            Sre(r"\\d").repeat(1, 2).named("seconds").append("″"),
398            Sre("EW").set.named("direction"),
399        )).named("coordinates")
400        assert coordinates == r"(?P<coordinates>(?P<degrees>\\d{1,3})° (?P<minutes>\\d{1,2})′ (?P<seconds>\\d{1,2})″ (?P<direction>[EW]))"
401        ```
402        (The resulting regular expression is suboptimal because it would capture values of degrees, minutes, seconds which
403        are illegal or not part of the longitude notation norm, e.g. `190° 78′ 93″ E` would be a match.)
404        """
405        cls = type(self)
406        str_or_bytes = str if str in cls.__mro__ else bytes
407        result = str_or_bytes(self).join(elements)
408        return cls(result)

Analogous to str.join, but produces instances of StringRegex:

from human_regex import StringRegex as Sre

sre = Sre(" ").join(("hello", "world"))
assert sre == Sre("hello world")

A more complex example: A suboptimal regular expression for parsing geographical longitude in the form 127° 36′ 52″ W:

from human_regex import StringRegex as Sre

coordinates = Sre(" ").join((
    Sre(r"\d").repeat(1, 3).named("degrees").append("°"),
    Sre(r"\d").repeat(1, 2).named("minutes").append("′"),
    Sre(r"\d").repeat(1, 2).named("seconds").append("″"),
    Sre("EW").set.named("direction"),
)).named("coordinates")
assert coordinates == r"(?P<coordinates>(?P<degrees>\d{1,3})° (?P<minutes>\d{1,2})′ (?P<seconds>\d{1,2})″ (?P<direction>[EW]))"

(The resulting regular expression is suboptimal because it would capture values of degrees, minutes, seconds which are illegal or not part of the longitude notation norm, e.g. 190° 78′ 93″ E would be a match.)

unnamed: Self View Source

410    @property
411    def unnamed(self) -> Self:
412        """
413        @public
414        Creates an unnamed group with the contents of *self*.
415
416        ```py
417        from human_regex import StringRegex as Sre
418
419        number = Sre(r"\\d").one_or_more.unnamed
420        assert number == "(\\d+)"
421        sre = number.prepend("My favorite number is ").append(r"\\.")
422        assert sre == "My favorite number is (\\d+)\\."
423        ```
424        """
425        cls = type(self)
426        return cls.concatenate((cls.OPEN_GROUP, self, cls.CLOSE_GROUP))

Creates an unnamed group with the contents of self.

from human_regex import StringRegex as Sre

number = Sre(r"\d").one_or_more.unnamed
assert number == "(\d+)"
sre = number.prepend("My favorite number is ").append(r"\.")
assert sre == "My favorite number is (\d+)\."

extension: Self View Source

428    @property
429    def extension(self) -> Self:
430        """
431        @public
432        Returns a new `StringRegex` instance with *self* inside the extension notation `(?...)`:
433
434        ```py
435        from human_regex import StringRegex as Sre
436
437        sre: Sre = Sre("something").extension
438        assert sre == "(?something)"
439        ```
440        """
441        cls = type(self)
442        return cls.concatenate((cls.OPEN_EXTENSION, self, cls.CLOSE_EXTENSION))

Returns a new StringRegex instance with self inside the extension notation (?...):

from human_regex import StringRegex as Sre

sre: Sre = Sre("something").extension
assert sre == "(?something)"

@classmethod

def set_flags(cls, flags: str | bytes) -> Self: View Source

444    @classmethod
445    def set_flags(cls, flags: Text_Element) -> Self:
446        """
447        @public
448        A way of encoding regular expression flags into the expression string itself,
449        rather than passing it as *flag* argument to the `StringRegex.compile` or other methods.
450        This should only be used at the start of a regular expression.
451
452        Returns a `StringRegex` instance which sets the given *flags*. Flags can be one
453        or more letters from the set `a`, `i`, `L`, `m`, `s`, `u`, `x`.
454        Refer to the documentation of [re](https://docs.python.org/library/re.html),
455        search there for "aiLmsux".
456
457        ```py
458        from human_regex import StringRegex as Sre
459
460        # Create an expression with Sre.MULTILINE | Sre.IGNORECASE
461        sre = Sre.set_flags("mi").append("match.this")
462        assert sre == "(?mi)match.this"
463        ```
464        """
465        return cls(flags).extension

A way of encoding regular expression flags into the expression string itself, rather than passing it as flag argument to the StringRegex.compile or other methods. This should only be used at the start of a regular expression.

Returns a StringRegex instance which sets the given flags. Flags can be one or more letters from the set a, i, L, m, s, u, x. Refer to the documentation of re, search there for "aiLmsux".

from human_regex import StringRegex as Sre

# Create an expression with Sre.MULTILINE | Sre.IGNORECASE
sre = Sre.set_flags("mi").append("match.this")
assert sre == "(?mi)match.this"

no_capture: Self View Source

467    @property
468    def no_capture(self) -> Self:
469        """
470        @public
471        Returns a new `StringRegex` with a non-capturing group made of *self*:
472
473        ```py
474        from human_regex import StringRegex as Sre
475
476        sre = Sre("match").no_capture
477        assert sre == "(?:match)"
478        ```
479        """
480        cls = type(self)
481        return cls.concatenate((cls.NO_CAPTURE, self)).extension

Returns a new StringRegex with a non-capturing group made of self:

from human_regex import StringRegex as Sre

sre = Sre("match").no_capture
assert sre == "(?:match)"

def modify_flags(self, flags: str | bytes) -> Self: View Source

483    def modify_flags(self, flags: Text_Element) -> Self:
484        """
485        @public
486        Allows you to set different flags for a part of a more complex expression.
487
488        Returns a new `StringRegex` instance with *self* inside the
489        modify-flags-extension with flags modified as specified by the
490        *flags* argument. Flags can be one
491        or more letters from the set `a`, `i`, `L`, `m`, `s`, `u`, `x`, optionally
492        followed by "-" followed by one or more letters from the `i`, `m`, `s`, `x` set.
493
494
495        Refer to the documentation of [re](https://docs.python.org/library/re.html),
496        search there for "aiLmsux-imsx".
497
498        ```py
499        from human_regex import StringRegex as Sre
500        part1 = Sre.set_flags("mi").append("multiline.and.ignore.case.here")
501        assert part1 == "(?mi)multiline.and.ignore.case.here"
502        part2 = Sre("add.dot.all.but.do.NOT.ignore.case.HERE.and.no.multiline").modify_flags("s-im")
503        assert part2 == "(?s-im:add.dot.all.but.do.NOT.ignore.case.HERE.and.no.multiline)"
504        part3 = "again.multiline.and.ignore.case.here"
505        sre = Sre.concatenate((part1, part2, part3))
506        assert sre == "(?mi)multiline.and.ignore.case.here(?s-im:add.dot.all.but.do.NOT.ignore.case.HERE.and.no.multiline)again.multiline.and.ignore.case.here"
507        ```
508        """
509        cls = type(self)
510        extension_core = cls.concatenate((flags, cls.FLAGS_END, self))
511        return extension_core.extension

Allows you to set different flags for a part of a more complex expression.

Returns a new StringRegex instance with self inside the modify-flags-extension with flags modified as specified by the flags argument. Flags can be one or more letters from the set a, i, L, m, s, u, x, optionally followed by "-" followed by one or more letters from the i, m, s, x set.

Refer to the documentation of re, search there for "aiLmsux-imsx".

from human_regex import StringRegex as Sre
part1 = Sre.set_flags("mi").append("multiline.and.ignore.case.here")
assert part1 == "(?mi)multiline.and.ignore.case.here"
part2 = Sre("add.dot.all.but.do.NOT.ignore.case.HERE.and.no.multiline").modify_flags("s-im")
assert part2 == "(?s-im:add.dot.all.but.do.NOT.ignore.case.HERE.and.no.multiline)"
part3 = "again.multiline.and.ignore.case.here"
sre = Sre.concatenate((part1, part2, part3))
assert sre == "(?mi)multiline.and.ignore.case.here(?s-im:add.dot.all.but.do.NOT.ignore.case.HERE.and.no.multiline)again.multiline.and.ignore.case.here"

atomic: Self View Source

513    @property
514    def atomic(self) -> Self:
515        """
516        @public
517        Returns a new `StringRegex` instance with *self* as the content of an *atomic group*.
518
519        ```py
520        from human_regex import StringRegex as Sre
521
522        sre = Sre("content").atomic
523        assert sre == "(?>content)"
524        ```
525        """
526        cls = type(self)
527        return cls.concatenate((cls.ATOMIC, self)).extension

Returns a new StringRegex instance with self as the content of an atomic group.

from human_regex import StringRegex as Sre

sre = Sre("content").atomic
assert sre == "(?>content)"

def named(self, name: str | bytes) -> Self: View Source

529    def named(self, name: Text_Element) -> Self:
530        """
531        @public
532        Returns a new `StringRegex` instance with *self* as the content of a group named *name*.
533
534        ```py
535        from human_regex import StringRegex as Sre
536
537        word = Sre(r"\\w").one_or_more # \\w+
538        burger = word.named("burger")
539        assert burger == r"(?P<burger>\\w+)"
540        extra = word.named("extra")
541        assert extra == r"(?P<extra>\\w+)"
542
543        sre = burger + " with " + extra
544        assert sre == r"(?P<burger>\\w+) with (?P<extra>\\w+)"
545
546        match = sre.match("quarterpounder with cheese")
547        assert match.group("burger") == "quarterpounder"
548        assert match.group("extra") == "cheese"
549        ```
550        """
551        cls = type(self)
552        label = cls.concatenate((cls.OPEN_NAME, name, cls.CLOSE_NAME))
553        return cls.concatenate((label, self)).extension

Returns a new StringRegex instance with self as the content of a group named name.

from human_regex import StringRegex as Sre

word = Sre(r"\w").one_or_more # \w+
burger = word.named("burger")
assert burger == r"(?P<burger>\w+)"
extra = word.named("extra")
assert extra == r"(?P<extra>\w+)"

sre = burger + " with " + extra
assert sre == r"(?P<burger>\w+) with (?P<extra>\w+)"

match = sre.match("quarterpounder with cheese")
assert match.group("burger") == "quarterpounder"
assert match.group("extra") == "cheese"

backreference: Self View Source

555    @property
556    def backreference(self) -> Self:
557        """
558        @public
559        Returns `StringRegex` instance with *self* as the name of the group being refered back to.
560
561        ```py
562        from human_regex import StringRegex as Sre
563
564        word = Sre(r"\\w").one_or_more # \\w+
565        old_ruler = word.named("ruler") # (?P<ruler>\\w+)
566        new_ruler = Sre("ruler").backreference # (?P=ruler)
567        sre = Sre(" ").join(("The", old_ruler, "is dead, long live the", new_ruler.append("!")))
568
569        assert sre == "The (?P<ruler>\\w+) is dead, long live the (?P=ruler)!"
570        text = "The king is dead, long live the king!"
571        assert sre.match(text)
572        ```
573        """
574        cls = type(self)
575        return cls.concatenate((cls.NAME_REFERENCE, self)).extension

Returns StringRegex instance with self as the name of the group being refered back to.

from human_regex import StringRegex as Sre

word = Sre(r"\w").one_or_more # \w+
old_ruler = word.named("ruler") # (?P<ruler>\w+)
new_ruler = Sre("ruler").backreference # (?P=ruler)
sre = Sre(" ").join(("The", old_ruler, "is dead, long live the", new_ruler.append("!")))

assert sre == "The (?P<ruler>\w+) is dead, long live the (?P=ruler)!"
text = "The king is dead, long live the king!"
assert sre.match(text)

comment: Self View Source

577    @property
578    def comment(self) -> Self:
579        """
580        @public
581        Returns a new `StringRegex` with *self* as a comment.
582
583        ```py
584        from human_regex import StringRegex as Sre
585
586        sre = Sre(r"0-9a-f").set + Sre("any hex digit").comment
587        assert sre == "[0-9a-f](?#any hex digit)"
588        ```
589        """
590        cls = type(self)
591        return cls.concatenate((cls.COMMENT, self)).extension

Returns a new StringRegex with self as a comment.

from human_regex import StringRegex as Sre

sre = Sre(r"0-9a-f").set + Sre("any hex digit").comment
assert sre == "[0-9a-f](?#any hex digit)"

def followed_by(self, following: str | bytes) -> Self: View Source

593    def followed_by(self, following: Text_Element) -> Self:
594        """
595        @public
596        Returns a new `StringRegex` with *self* extended by *following* as the *positive lookahead assertion*.
597
598        ```py
599        from human_regex import StringRegex as Sre
600
601        sre = Sre("Isaac ").followed_by("Asimov")
602        assert sre == "Isaac (?=Asimov)"
603        ```
604        """
605        cls = type(self)
606        follows = cls.concatenate((cls.FOLLOWED_BY, following)).extension
607        return cls.concatenate((self, follows))

Returns a new StringRegex with self extended by following as the positive lookahead assertion.

from human_regex import StringRegex as Sre

sre = Sre("Isaac ").followed_by("Asimov")
assert sre == "Isaac (?=Asimov)"

def not_followed_by(self, not_following: str | bytes) -> Self: View Source

609    def not_followed_by(self, not_following: Text_Element) -> Self:
610        """
611        @public
612        Returns a new `StringRegex` with *self* extended by *not_following* as the *negative lookahead assertion*.
613
614        ```py
615        from human_regex import StringRegex as Sre
616
617        sre = Sre("Isaac ").not_followed_by("Asimov")
618        assert sre == "Isaac (?!Asimov)"
619        ```
620        """
621        cls = type(self)
622        does_not_follow = cls.concatenate((cls.NOT_FOLLOWED_BY, not_following)).extension
623        return cls.concatenate((self, does_not_follow))

Returns a new StringRegex with self extended by not_following as the negative lookahead assertion.

from human_regex import StringRegex as Sre

sre = Sre("Isaac ").not_followed_by("Asimov")
assert sre == "Isaac (?!Asimov)"

def preceded_by(self, preceding: str | bytes) -> Self: View Source

625    def preceded_by(self, preceding: Text_Element) -> Self:
626        """
627        @public
628        Returns a new `StringRegex` with *self* extended by *preceding* as the *positive lookbehind assertion*.
629
630        ```py
631        from human_regex import StringRegex as Sre
632
633        sre = Sre("chat").preceded_by("chit")
634        assert sre == "(?<=chit)chat"
635        ```
636        """
637        cls = type(self)
638        precedes = cls.concatenate((cls.PRECEDED_BY, preceding)).extension
639        return cls.concatenate((precedes, self))

Returns a new StringRegex with self extended by preceding as the positive lookbehind assertion.

from human_regex import StringRegex as Sre

sre = Sre("chat").preceded_by("chit")
assert sre == "(?<=chit)chat"

def not_preceded_by(self, not_preceding: str | bytes) -> Self: View Source

641    def not_preceded_by(self, not_preceding: Text_Element) -> Self:
642        """
643        @public
644        Returns a new `StringRegex` with *self* extended by *not_preceding* as the *negative lookbehind assertion*.
645
646        ```py
647        from human_regex import StringRegex as Sre
648
649        sre = Sre("chat").not_preceded_by("chit")
650        assert sre == "(?<!chit)chat"
651        ```
652        """
653        cls = type(self)
654        does_not_precede = cls.concatenate((cls.NOT_PRECEDED_BY, not_preceding)).extension
655        return cls.concatenate((does_not_precede, self))

Returns a new StringRegex with self extended by not_preceding as the negative lookbehind assertion.

from human_regex import StringRegex as Sre

sre = Sre("chat").not_preceded_by("chit")
assert sre == "(?<!chit)chat"

@classmethod

657    @classmethod
658    def yes_no(cls, id_name: int | Text_Element, yes: Text_Element, no: Text_Element | None = None) -> Self:
659        """
660        @public
661        Constructs the *yes-no-pattern* which will match with *yes*-pattern
662        if the group with given *id_name* exists, and with *no*-pattern if it doesn't.
663        *no*-pattern is optional and can be omitted. *id_name* can be the number
664        of the group or the name of the group if the group was named.
665
666        Example: Recreating the expression `(<)?(\\w+@\\w+(?:\\.\\w+)+)(?(1)>|$)`
667        from the built-in documentation of [re](https://docs.python.org/library/re.html)
668        for a poor email matching pattern, which will match with `<user@host.com>` as well as
669        `user@host.com`, but not with `<user@host.com` nor `user@host.com>`:
670
671        ```py
672        from human_regex import StringRegex as Sre
673
674        word = Sre(r"\\w").one_or_more # \\w+
675        mail_core = (
676            word +
677            "@" +
678            word +
679            word.prepend(r"\\.").no_capture.one_or_more
680        ).unnamed # (\\w+@\\w+(?:\\.\\w+)+)
681        maybe_less_than = Sre("<").unnamed.optional # (<)?
682        maybe_greater_than = Sre.yes_no(1, ">", "$") # (?(1)>|$)
683        mail_re = maybe_less_than + mail_core + maybe_greater_than
684
685        assert mail_re == r"(<)?(\\w+@\\w+(?:\\.\\w+)+)(?(1)>|$)"
686        ```
687        """
688        id_name = cls._convert_to_bytes_or_string(id_name) if isinstance(id_name, int) else id_name
689        result = cls.concatenate((cls(id_name).unnamed, yes))
690        if no is not None:
691            result += cls.concatenate((cls.OR, no))
692        return result.extension

Constructs the yes-no-pattern which will match with yes-pattern if the group with given id_name exists, and with no-pattern if it doesn't. no-pattern is optional and can be omitted. id_name can be the number of the group or the name of the group if the group was named.

Example: Recreating the expression (<)?(\w+@\w+(?:\.\w+)+)(?(1)>|$) from the built-in documentation of re for a poor email matching pattern, which will match with <user@host.com> as well as user@host.com, but not with <user@host.com nor user@host.com>:

from human_regex import StringRegex as Sre

word = Sre(r"\w").one_or_more # \w+
mail_core = (
    word +
    "@" +
    word +
    word.prepend(r"\.").no_capture.one_or_more
).unnamed # (\w+@\w+(?:\.\w+)+)
maybe_less_than = Sre("<").unnamed.optional # (<)?
maybe_greater_than = Sre.yes_no(1, ">", "$") # (?(1)>|$)
mail_re = maybe_less_than + mail_core + maybe_greater_than

assert mail_re == r"(<)?(\w+@\w+(?:\.\w+)+)(?(1)>|$)"

set: Self View Source

701    @property
702    def set(self) -> Self:
703        """
704        @public
705        Returns a new `StringRegex` for a set of *self*.
706
707        ```py
708        from human_regex import StringRegex as Sre
709
710        hex_digits = Sre("a-f0-9").set
711        assert hex_digits == "[a-f0-9]"
712        ```
713        """
714        cls = type(self)
715        return cls.concatenate((cls.OPEN_CHAR_SET, self, cls.CLOSE_CHAR_SET))

Returns a new StringRegex for a set of self.

from human_regex import StringRegex as Sre

hex_digits = Sre("a-f0-9").set
assert hex_digits == "[a-f0-9]"

optional: Self View Source

717    @property
718    def optional(self) -> Self:
719        """
720        @public
721        Returns a new `StringRegex` with `?` appended to *self*.
722
723        ```py
724        from human_regex import StringRegex as Sre
725
726        character = Sre(r"\\w")
727        optional_character = character.optional
728        assert optional_character == r"\\w?"
729        ```
730        """
731        cls = type(self)
732        return cls.concatenate((self, cls.OPTIONAL))

Returns a new StringRegex with ? appended to self.

from human_regex import StringRegex as Sre

character = Sre(r"\w")
optional_character = character.optional
assert optional_character == r"\w?"

zero_or_more: Self View Source

734    @property
735    def zero_or_more(self) -> Self:
736        """
737        @public
738        Returns a new `StringRegex` with `*` appended to *self*.
739
740        ```py
741        from human_regex import StringRegex as Sre
742
743        digit = Sre(r"\\d")
744        maybe_digits = digit.zero_or_more
745        assert maybe_digits == r"\\d*"
746        ```
747        """
748        cls = type(self)
749        return cls.concatenate((self, cls.ZERO_OR_MORE))

Returns a new StringRegex with * appended to self.

from human_regex import StringRegex as Sre

digit = Sre(r"\d")
maybe_digits = digit.zero_or_more
assert maybe_digits == r"\d*"

one_or_more: Self View Source

751    @property
752    def one_or_more(self) -> Self:
753        """
754        @public
755        Returns a new `StringRegex` with `+` appended to *self*.
756
757        ```py
758        from human_regex import StringRegex as Sre
759
760        digit = Sre(r"\\d")
761        some_digits = digit.one_or_more
762        assert some_digits == r"\\d+"
763        ```
764        """
765        cls = type(self)
766        return cls.concatenate((self, cls.ONE_OR_MORE))

Returns a new StringRegex with + appended to self.

from human_regex import StringRegex as Sre

digit = Sre(r"\d")
some_digits = digit.one_or_more
assert some_digits == r"\d+"

lazy: Self View Source

768    @property
769    def lazy(self) -> Self:
770        """
771        @public
772        Returns a new `StringRegex` with `?` appended to *self*.
773
774        ```py
775        from human_regex import StringRegex as Sre
776
777        everything = Sre(".*")
778        assert everything.lazy == ".*?"
779
780        tab = Sre(r"\\t")
781        everything_before_first_tab = everything.prepend("^").lazy.named("before_tab").append(tab)
782        assert everything_before_first_tab == r"(?P<before_tab>^.*?)\\t"
783        ```
784        """
785        cls = type(self)
786        return cls.concatenate((self, cls.LAZY))

Returns a new StringRegex with ? appended to self.

from human_regex import StringRegex as Sre

everything = Sre(".*")
assert everything.lazy == ".*?"

tab = Sre(r"\t")
everything_before_first_tab = everything.prepend("^").lazy.named("before_tab").append(tab)
assert everything_before_first_tab == r"(?P<before_tab>^.*?)\t"

def repeat(self, minimum, maximum, /) -> Self: View Source

788    def repeat(self, minimum, maximum, /) -> Self:
789        """
790        @public
791        Returns a new `StringRegex` with a greedy quantifier appended to *self*.
792        *minimum* and *maximum* specify limits of repetition. *maximum* is optional
793
794        ```py
795        from human_regex import StringRegex as Sre
796
797        two_or_more_As = Sre("A").repeat(2, None)
798        assert two_or_more_As == "A{2,}"
799        two_to_four_As = Sre("A").repeat(2, 4)
800        assert two_to_four_As == "A{2,4}"
801        up_to_four_As = Sre("A").repeat(None, 4)
802        assert up_to_four_As == "A{,4}"
803        ```
804        """
805        cls = type(self)
806        minimum = cls._convert_to_bytes_or_string(minimum) if minimum is not None else cls.EMPTY
807        maximum = cls._convert_to_bytes_or_string(maximum) if maximum is not None else cls.EMPTY
808        return cls.concatenate(
809            (self, cls.OPEN_QUANTIFIER, minimum, cls.QUANTIFIER_SEPARATOR, maximum, cls.CLOSE_QUANTIFIER)
810        )

Returns a new StringRegex with a greedy quantifier appended to self. minimum and maximum specify limits of repetition. maximum is optional

from human_regex import StringRegex as Sre

two_or_more_As = Sre("A").repeat(2, None)
assert two_or_more_As == "A{2,}"
two_to_four_As = Sre("A").repeat(2, 4)
assert two_to_four_As == "A{2,4}"
up_to_four_As = Sre("A").repeat(None, 4)
assert up_to_four_As == "A{,4}"

def exactly(self, number: int) -> Self: View Source

812    def exactly(self, number: int) -> Self:
813        """
814        @public
815        Returns a new `StringRegex` with a fixed quantifier of *number* appended to *self*.
816
817        ```py
818        from human_regex import StringRegex as Sre
819
820        three_As = Sre("A").exactly(3)
821        assert three_As == "A{3}"
822        ```
823        """
824        cls = type(self)
825        number = cls._convert_to_bytes_or_string(number) if number else cls.EMPTY
826        return cls.concatenate((self, cls.OPEN_QUANTIFIER, number, cls.CLOSE_QUANTIFIER))

Returns a new StringRegex with a fixed quantifier of number appended to self.

from human_regex import StringRegex as Sre

three_As = Sre("A").exactly(3)
assert three_As == "A{3}"