human_regex.bases.general_regex
All examples here are written for the human_regex.StringRegex
variant, but
they are equally applicable for the human_regex.BytesRegex
variant, if you
use byte-strings (b"..."
) instead of strings ("..."
).
1""" 2All examples here are written for the `human_regex.StringRegex` variant, but 3they are equally applicable for the `human_regex.BytesRegex` variant, if you 4use byte-strings (`b"..."`) instead of strings (`"..."`). 5""" 6 7from abc import abstractmethod 8from collections.abc import Iterable 9from typing import Self 10 11from ..contracts.abstract_regex import AbstractRegex 12 13Text_Element = str | bytes 14""" 15@private 16""" 17 18 19class GeneralRegexBase(AbstractRegex): 20 """ 21 Base class for `human_regex.StringRegex` and `human_regex.BytesRegex`. 22 """ 23 24 @classmethod 25 @property 26 @abstractmethod 27 def EMPTY(cls) -> Text_Element: 28 """ 29 @private 30 """ 31 ... 32 33 @classmethod 34 @property 35 @abstractmethod 36 def OPEN_CHAR_SET(cls) -> Text_Element: 37 """ 38 @private 39 """ 40 ... 41 42 @classmethod 43 @property 44 @abstractmethod 45 def CLOSE_CHAR_SET(cls) -> Text_Element: 46 """ 47 @private 48 """ 49 ... 50 51 @classmethod 52 @property 53 @abstractmethod 54 def OPEN_GROUP(cls) -> Text_Element: 55 """ 56 @private 57 """ 58 ... 59 60 @classmethod 61 @property 62 @abstractmethod 63 def CLOSE_GROUP(cls) -> Text_Element: 64 """ 65 @private 66 """ 67 ... 68 69 @classmethod 70 @property 71 @abstractmethod 72 def OPEN_EXTENSION(cls) -> Text_Element: 73 """ 74 @private 75 """ 76 ... 77 78 @classmethod 79 @property 80 @abstractmethod 81 def CLOSE_EXTENSION(cls) -> Text_Element: 82 """ 83 @private 84 """ 85 ... 86 87 @classmethod 88 @property 89 @abstractmethod 90 def OPEN_NAME(cls) -> Text_Element: 91 """ 92 @private 93 """ 94 ... 95 96 @classmethod 97 @property 98 @abstractmethod 99 def CLOSE_NAME(cls) -> Text_Element: 100 """ 101 @private 102 """ 103 ... 104 105 @classmethod 106 @property 107 @abstractmethod 108 def OPEN_QUANTIFIER(cls) -> Text_Element: 109 """ 110 @private 111 """ 112 ... 113 114 @classmethod 115 @property 116 @abstractmethod 117 def CLOSE_QUANTIFIER(cls) -> Text_Element: 118 """ 119 @private 120 """ 121 ... 122 123 @classmethod 124 @property 125 @abstractmethod 126 def QUANTIFIER_SEPARATOR(cls) -> Text_Element: 127 """ 128 @private 129 """ 130 ... 131 132 @classmethod 133 @property 134 @abstractmethod 135 def OR(cls) -> Text_Element: 136 """ 137 @private 138 """ 139 ... 140 141 @classmethod 142 @property 143 @abstractmethod 144 def NO_CAPTURE(cls) -> Text_Element: 145 """ 146 @private 147 """ 148 ... 149 150 @classmethod 151 @property 152 @abstractmethod 153 def FLAGS_END(cls) -> Text_Element: 154 """ 155 @private 156 """ 157 ... 158 159 @classmethod 160 @property 161 @abstractmethod 162 def ATOMIC(cls) -> Text_Element: 163 """ 164 @private 165 """ 166 ... 167 168 @classmethod 169 @property 170 @abstractmethod 171 def NAME_REFERENCE(cls) -> Text_Element: 172 """ 173 @private 174 """ 175 ... 176 177 @classmethod 178 @property 179 @abstractmethod 180 def COMMENT(cls) -> Text_Element: 181 """ 182 @private 183 """ 184 ... 185 186 @classmethod 187 @property 188 @abstractmethod 189 def FOLLOWED_BY(cls) -> Text_Element: 190 """ 191 @private 192 """ 193 ... 194 195 @classmethod 196 @property 197 @abstractmethod 198 def NOT_FOLLOWED_BY(cls) -> Text_Element: 199 """ 200 @private 201 """ 202 ... 203 204 @classmethod 205 @property 206 @abstractmethod 207 def PRECEDED_BY(cls) -> Text_Element: 208 """ 209 @private 210 """ 211 ... 212 213 @classmethod 214 @property 215 @abstractmethod 216 def NOT_PRECEDED_BY(cls) -> Text_Element: 217 """ 218 @private 219 """ 220 ... 221 222 @classmethod 223 @property 224 @abstractmethod 225 def ZERO_OR_MORE(cls) -> Text_Element: 226 """ 227 @private 228 """ 229 ... 230 231 @classmethod 232 @property 233 @abstractmethod 234 def ONE_OR_MORE(cls) -> Text_Element: 235 """ 236 @private 237 """ 238 ... 239 240 @classmethod 241 @property 242 @abstractmethod 243 def OPTIONAL(cls) -> Text_Element: 244 """ 245 @private 246 """ 247 ... 248 249 @classmethod 250 @property 251 @abstractmethod 252 def LAZY(cls) -> Text_Element: 253 """ 254 @private 255 """ 256 ... 257 258 @classmethod 259 def concatenate(cls, elements: Iterable[Text_Element]) -> Self: 260 """ 261 Concatenates items in *elements*. Returns `StringRegex` instance of the joined string. 262 263 ```py 264 from human_regex import StringRegex as Sre 265 266 sre = Sre.concatenate(("Hello", " ", "world")) 267 assert sre == Sre("Hello world") 268 # equivalent to the following: 269 sre = Sre("".join(("Hello", " ", "world"))) 270 sre = Sre("").join(("Hello", " ", "world")) 271 sre = Sre("Hello") + " " + "world" 272 ``` 273 """ 274 str_or_bytes = str if str in cls.__mro__ else bytes 275 result = str_or_bytes(cls.EMPTY).join(elements) 276 return cls(result) 277 278 def __add__(self, other: Text_Element) -> Self: 279 """ 280 @public 281 Support of the `+` operator. Returns a new `StringRegex` instance which has *other* 282 appended to the original `StringRegex` instance. 283 284 ```py 285 from human_regex import StringRegex as Sre 286 287 sre1 = Sre("abc") 288 sre2 = Sre("def") 289 sre = sre1 + sre2 290 assert sre == Sre("abcdef") 291 292 # identical to: 293 sre = Sre("abc") + "def" 294 ``` 295 """ 296 cls = type(self) 297 return cls.concatenate((self, other)) 298 299 def __or__(self, other) -> Self: 300 """ 301 @public 302 Support of the `|` operator. Returns a new `StringRegex` instance which is 303 the original instance joined with *other* using the `|` symbol. 304 305 ```py 306 from human_regex import StringRegex as Sre 307 308 sre1 = Sre("abc") 309 sre2 = Sre("def") 310 sre = sre1 | sre2 311 assert sre == Sre("abc|def") 312 313 # identical to: 314 sre = Sre("abc") | "def" 315 ``` 316 """ 317 cls = type(self) 318 return cls.concatenate((self, cls.OR, other)) 319 320 def append(self, appendent: Text_Element) -> Self: 321 """ 322 @public 323 Returns a new `StringRegex` instance which is the original instance 324 followed by *appendent*. 325 326 ```py 327 from human_regex import StringRegex as Sre 328 329 sre = Sre("pretty").append(" little").append(Sre(" angel")) 330 assert sre == Sre("pretty little angel") 331 ``` 332 """ 333 cls = type(self) 334 return cls.concatenate((self, appendent)) 335 336 def prepend(self, prependent: Text_Element) -> Self: 337 """ 338 @public 339 Returns a new `StringRegex` instance with *self* 340 following the *prependent*. 341 342 This is useful when an already defined `StringRegex` instance is used 343 as a building block of a more complex `StringRegex` and we need 344 to add some `str` instances before it. 345 346 ```py 347 from human_regex import StringRegex as Sre 348 349 party = Sre("Party") 350 # Intention: to add the strings "A Long-" and "Expected " 351 # before the `party` instance. 352 # 353 # The built-in str does not have an .append method, so this will fail: 354 # "A Long-".append("Expected" ").append(party). 355 # 356 # If we use string addition, we would get a str instance 357 # rather than a StringRegex instance: 358 # s: str = "A Long-" + "Expected " + party 359 # 360 # To get a StringRegex instance, we can turn things around 361 # and instead prepend normal str instances to a StringRegex: 362 sre: Sre = party.prepend("Expected ").prepend("A Long-") 363 assert sre == Sre("A Long-Expected Party") 364 365 # Alternative: convert the starting element to StringRegex and add the rest: 366 sre: Sre = Sre("A Long-") + "Expected" + party 367 # Or concatenate: 368 sre: Sre = Sre.concatenate(("A Long-", "Expected ", party)) 369 ``` 370 """ 371 cls = type(self) 372 return cls.concatenate((prependent, self)) 373 374 def join(self, elements: Iterable[Text_Element]) -> Self: 375 # ruff: noqa: RUF002, E501 376 """ 377 @public 378 Analogous to `str.join`, but produces instances of `StringRegex`: 379 380 ```py 381 from human_regex import StringRegex as Sre 382 383 sre = Sre(" ").join(("hello", "world")) 384 assert sre == Sre("hello world") 385 ``` 386 387 A more complex example: A suboptimal regular expression for 388 parsing geographical longitude in the form *127° 36′ 52″ W*: 389 390 ```py 391 from human_regex import StringRegex as Sre 392 393 coordinates = Sre(" ").join(( 394 Sre(r"\\d").repeat(1, 3).named("degrees").append("°"), 395 Sre(r"\\d").repeat(1, 2).named("minutes").append("′"), 396 Sre(r"\\d").repeat(1, 2).named("seconds").append("″"), 397 Sre("EW").set.named("direction"), 398 )).named("coordinates") 399 assert coordinates == r"(?P<coordinates>(?P<degrees>\\d{1,3})° (?P<minutes>\\d{1,2})′ (?P<seconds>\\d{1,2})″ (?P<direction>[EW]))" 400 ``` 401 (The resulting regular expression is suboptimal because it would capture values of degrees, minutes, seconds which 402 are illegal or not part of the longitude notation norm, e.g. `190° 78′ 93″ E` would be a match.) 403 """ 404 cls = type(self) 405 str_or_bytes = str if str in cls.__mro__ else bytes 406 result = str_or_bytes(self).join(elements) 407 return cls(result) 408 409 @property 410 def unnamed(self) -> Self: 411 """ 412 @public 413 Creates an unnamed group with the contents of *self*. 414 415 ```py 416 from human_regex import StringRegex as Sre 417 418 number = Sre(r"\\d").one_or_more.unnamed 419 assert number == "(\\d+)" 420 sre = number.prepend("My favorite number is ").append(r"\\.") 421 assert sre == "My favorite number is (\\d+)\\." 422 ``` 423 """ 424 cls = type(self) 425 return cls.concatenate((cls.OPEN_GROUP, self, cls.CLOSE_GROUP)) 426 427 @property 428 def extension(self) -> Self: 429 """ 430 @public 431 Returns a new `StringRegex` instance with *self* inside the extension notation `(?...)`: 432 433 ```py 434 from human_regex import StringRegex as Sre 435 436 sre: Sre = Sre("something").extension 437 assert sre == "(?something)" 438 ``` 439 """ 440 cls = type(self) 441 return cls.concatenate((cls.OPEN_EXTENSION, self, cls.CLOSE_EXTENSION)) 442 443 @classmethod 444 def set_flags(cls, flags: Text_Element) -> Self: 445 """ 446 @public 447 A way of encoding regular expression flags into the expression string itself, 448 rather than passing it as *flag* argument to the `StringRegex.compile` or other methods. 449 This should only be used at the start of a regular expression. 450 451 Returns a `StringRegex` instance which sets the given *flags*. Flags can be one 452 or more letters from the set `a`, `i`, `L`, `m`, `s`, `u`, `x`. 453 Refer to the documentation of [re](https://docs.python.org/library/re.html), 454 search there for "aiLmsux". 455 456 ```py 457 from human_regex import StringRegex as Sre 458 459 # Create an expression with Sre.MULTILINE | Sre.IGNORECASE 460 sre = Sre.set_flags("mi").append("match.this") 461 assert sre == "(?mi)match.this" 462 ``` 463 """ 464 return cls(flags).extension 465 466 @property 467 def no_capture(self) -> Self: 468 """ 469 @public 470 Returns a new `StringRegex` with a non-capturing group made of *self*: 471 472 ```py 473 from human_regex import StringRegex as Sre 474 475 sre = Sre("match").no_capture 476 assert sre == "(?:match)" 477 ``` 478 """ 479 cls = type(self) 480 return cls.concatenate((cls.NO_CAPTURE, self)).extension 481 482 def modify_flags(self, flags: Text_Element) -> Self: 483 """ 484 @public 485 Allows you to set different flags for a part of a more complex expression. 486 487 Returns a new `StringRegex` instance with *self* inside the 488 modify-flags-extension with flags modified as specified by the 489 *flags* argument. Flags can be one 490 or more letters from the set `a`, `i`, `L`, `m`, `s`, `u`, `x`, optionally 491 followed by "-" followed by one or more letters from the `i`, `m`, `s`, `x` set. 492 493 494 Refer to the documentation of [re](https://docs.python.org/library/re.html), 495 search there for "aiLmsux-imsx". 496 497 ```py 498 from human_regex import StringRegex as Sre 499 part1 = Sre.set_flags("mi").append("multiline.and.ignore.case.here") 500 assert part1 == "(?mi)multiline.and.ignore.case.here" 501 part2 = Sre("add.dot.all.but.do.NOT.ignore.case.HERE.and.no.multiline").modify_flags("s-im") 502 assert part2 == "(?s-im:add.dot.all.but.do.NOT.ignore.case.HERE.and.no.multiline)" 503 part3 = "again.multiline.and.ignore.case.here" 504 sre = Sre.concatenate((part1, part2, part3)) 505 assert sre == "(?mi)multiline.and.ignore.case.here(?s-im:add.dot.all.but.do.NOT.ignore.case.HERE.and.no.multiline)again.multiline.and.ignore.case.here" 506 ``` 507 """ 508 cls = type(self) 509 extension_core = cls.concatenate((flags, cls.FLAGS_END, self)) 510 return extension_core.extension 511 512 @property 513 def atomic(self) -> Self: 514 """ 515 @public 516 Returns a new `StringRegex` instance with *self* as the content of an *atomic group*. 517 518 ```py 519 from human_regex import StringRegex as Sre 520 521 sre = Sre("content").atomic 522 assert sre == "(?>content)" 523 ``` 524 """ 525 cls = type(self) 526 return cls.concatenate((cls.ATOMIC, self)).extension 527 528 def named(self, name: Text_Element) -> Self: 529 """ 530 @public 531 Returns a new `StringRegex` instance with *self* as the content of a group named *name*. 532 533 ```py 534 from human_regex import StringRegex as Sre 535 536 word = Sre(r"\\w").one_or_more # \\w+ 537 burger = word.named("burger") 538 assert burger == r"(?P<burger>\\w+)" 539 extra = word.named("extra") 540 assert extra == r"(?P<extra>\\w+)" 541 542 sre = burger + " with " + extra 543 assert sre == r"(?P<burger>\\w+) with (?P<extra>\\w+)" 544 545 match = sre.match("quarterpounder with cheese") 546 assert match.group("burger") == "quarterpounder" 547 assert match.group("extra") == "cheese" 548 ``` 549 """ 550 cls = type(self) 551 label = cls.concatenate((cls.OPEN_NAME, name, cls.CLOSE_NAME)) 552 return cls.concatenate((label, self)).extension 553 554 @property 555 def backreference(self) -> Self: 556 """ 557 @public 558 Returns `StringRegex` instance with *self* as the name of the group being refered back to. 559 560 ```py 561 from human_regex import StringRegex as Sre 562 563 word = Sre(r"\\w").one_or_more # \\w+ 564 old_ruler = word.named("ruler") # (?P<ruler>\\w+) 565 new_ruler = Sre("ruler").backreference # (?P=ruler) 566 sre = Sre(" ").join(("The", old_ruler, "is dead, long live the", new_ruler.append("!"))) 567 568 assert sre == "The (?P<ruler>\\w+) is dead, long live the (?P=ruler)!" 569 text = "The king is dead, long live the king!" 570 assert sre.match(text) 571 ``` 572 """ 573 cls = type(self) 574 return cls.concatenate((cls.NAME_REFERENCE, self)).extension 575 576 @property 577 def comment(self) -> Self: 578 """ 579 @public 580 Returns a new `StringRegex` with *self* as a comment. 581 582 ```py 583 from human_regex import StringRegex as Sre 584 585 sre = Sre(r"0-9a-f").set + Sre("any hex digit").comment 586 assert sre == "[0-9a-f](?#any hex digit)" 587 ``` 588 """ 589 cls = type(self) 590 return cls.concatenate((cls.COMMENT, self)).extension 591 592 def followed_by(self, following: Text_Element) -> Self: 593 """ 594 @public 595 Returns a new `StringRegex` with *self* extended by *following* as the *positive lookahead assertion*. 596 597 ```py 598 from human_regex import StringRegex as Sre 599 600 sre = Sre("Isaac ").followed_by("Asimov") 601 assert sre == "Isaac (?=Asimov)" 602 ``` 603 """ 604 cls = type(self) 605 follows = cls.concatenate((cls.FOLLOWED_BY, following)).extension 606 return cls.concatenate((self, follows)) 607 608 def not_followed_by(self, not_following: Text_Element) -> Self: 609 """ 610 @public 611 Returns a new `StringRegex` with *self* extended by *not_following* as the *negative lookahead assertion*. 612 613 ```py 614 from human_regex import StringRegex as Sre 615 616 sre = Sre("Isaac ").not_followed_by("Asimov") 617 assert sre == "Isaac (?!Asimov)" 618 ``` 619 """ 620 cls = type(self) 621 does_not_follow = cls.concatenate((cls.NOT_FOLLOWED_BY, not_following)).extension 622 return cls.concatenate((self, does_not_follow)) 623 624 def preceded_by(self, preceding: Text_Element) -> Self: 625 """ 626 @public 627 Returns a new `StringRegex` with *self* extended by *preceding* as the *positive lookbehind assertion*. 628 629 ```py 630 from human_regex import StringRegex as Sre 631 632 sre = Sre("chat").preceded_by("chit") 633 assert sre == "(?<=chit)chat" 634 ``` 635 """ 636 cls = type(self) 637 precedes = cls.concatenate((cls.PRECEDED_BY, preceding)).extension 638 return cls.concatenate((precedes, self)) 639 640 def not_preceded_by(self, not_preceding: Text_Element) -> Self: 641 """ 642 @public 643 Returns a new `StringRegex` with *self* extended by *not_preceding* as the *negative lookbehind assertion*. 644 645 ```py 646 from human_regex import StringRegex as Sre 647 648 sre = Sre("chat").not_preceded_by("chit") 649 assert sre == "(?<!chit)chat" 650 ``` 651 """ 652 cls = type(self) 653 does_not_precede = cls.concatenate((cls.NOT_PRECEDED_BY, not_preceding)).extension 654 return cls.concatenate((does_not_precede, self)) 655 656 @classmethod 657 def yes_no(cls, id_name: int | Text_Element, yes: Text_Element, no: Text_Element | None = None) -> Self: 658 """ 659 @public 660 Constructs the *yes-no-pattern* which will match with *yes*-pattern 661 if the group with given *id_name* exists, and with *no*-pattern if it doesn't. 662 *no*-pattern is optional and can be omitted. *id_name* can be the number 663 of the group or the name of the group if the group was named. 664 665 Example: Recreating the expression `(<)?(\\w+@\\w+(?:\\.\\w+)+)(?(1)>|$)` 666 from the built-in documentation of [re](https://docs.python.org/library/re.html) 667 for a poor email matching pattern, which will match with `<user@host.com>` as well as 668 `user@host.com`, but not with `<user@host.com` nor `user@host.com>`: 669 670 ```py 671 from human_regex import StringRegex as Sre 672 673 word = Sre(r"\\w").one_or_more # \\w+ 674 mail_core = ( 675 word + 676 "@" + 677 word + 678 word.prepend(r"\\.").no_capture.one_or_more 679 ).unnamed # (\\w+@\\w+(?:\\.\\w+)+) 680 maybe_less_than = Sre("<").unnamed.optional # (<)? 681 maybe_greater_than = Sre.yes_no(1, ">", "$") # (?(1)>|$) 682 mail_re = maybe_less_than + mail_core + maybe_greater_than 683 684 assert mail_re == r"(<)?(\\w+@\\w+(?:\\.\\w+)+)(?(1)>|$)" 685 ``` 686 """ 687 id_name = cls._convert_to_bytes_or_string(id_name) if isinstance(id_name, int) else id_name 688 result = cls.concatenate((cls(id_name).unnamed, yes)) 689 if no is not None: 690 result += cls.concatenate((cls.OR, no)) 691 return result.extension 692 693 @classmethod 694 def _convert_to_bytes_or_string(cls, i: int) -> Text_Element: 695 i = str(i) 696 if bytes in cls.__mro__: 697 i = i.encode() 698 return i 699 700 @property 701 def set(self) -> Self: 702 """ 703 @public 704 Returns a new `StringRegex` for a set of *self*. 705 706 ```py 707 from human_regex import StringRegex as Sre 708 709 hex_digits = Sre("a-f0-9").set 710 assert hex_digits == "[a-f0-9]" 711 ``` 712 """ 713 cls = type(self) 714 return cls.concatenate((cls.OPEN_CHAR_SET, self, cls.CLOSE_CHAR_SET)) 715 716 @property 717 def optional(self) -> Self: 718 """ 719 @public 720 Returns a new `StringRegex` with `?` appended to *self*. 721 722 ```py 723 from human_regex import StringRegex as Sre 724 725 character = Sre(r"\\w") 726 optional_character = character.optional 727 assert optional_character == r"\\w?" 728 ``` 729 """ 730 cls = type(self) 731 return cls.concatenate((self, cls.OPTIONAL)) 732 733 @property 734 def zero_or_more(self) -> Self: 735 """ 736 @public 737 Returns a new `StringRegex` with `*` appended to *self*. 738 739 ```py 740 from human_regex import StringRegex as Sre 741 742 digit = Sre(r"\\d") 743 maybe_digits = digit.zero_or_more 744 assert maybe_digits == r"\\d*" 745 ``` 746 """ 747 cls = type(self) 748 return cls.concatenate((self, cls.ZERO_OR_MORE)) 749 750 @property 751 def one_or_more(self) -> Self: 752 """ 753 @public 754 Returns a new `StringRegex` with `+` appended to *self*. 755 756 ```py 757 from human_regex import StringRegex as Sre 758 759 digit = Sre(r"\\d") 760 some_digits = digit.one_or_more 761 assert some_digits == r"\\d+" 762 ``` 763 """ 764 cls = type(self) 765 return cls.concatenate((self, cls.ONE_OR_MORE)) 766 767 @property 768 def lazy(self) -> Self: 769 """ 770 @public 771 Returns a new `StringRegex` with `?` appended to *self*. 772 773 ```py 774 from human_regex import StringRegex as Sre 775 776 everything = Sre(".*") 777 assert everything.lazy == ".*?" 778 779 tab = Sre(r"\\t") 780 everything_before_first_tab = everything.prepend("^").lazy.named("before_tab").append(tab) 781 assert everything_before_first_tab == r"(?P<before_tab>^.*?)\\t" 782 ``` 783 """ 784 cls = type(self) 785 return cls.concatenate((self, cls.LAZY)) 786 787 def repeat(self, minimum, maximum, /) -> Self: 788 """ 789 @public 790 Returns a new `StringRegex` with a greedy quantifier appended to *self*. 791 *minimum* and *maximum* specify limits of repetition. *maximum* is optional 792 793 ```py 794 from human_regex import StringRegex as Sre 795 796 two_or_more_As = Sre("A").repeat(2, None) 797 assert two_or_more_As == "A{2,}" 798 two_to_four_As = Sre("A").repeat(2, 4) 799 assert two_to_four_As == "A{2,4}" 800 up_to_four_As = Sre("A").repeat(None, 4) 801 assert up_to_four_As == "A{,4}" 802 ``` 803 """ 804 cls = type(self) 805 minimum = cls._convert_to_bytes_or_string(minimum) if minimum is not None else cls.EMPTY 806 maximum = cls._convert_to_bytes_or_string(maximum) if maximum is not None else cls.EMPTY 807 return cls.concatenate( 808 (self, cls.OPEN_QUANTIFIER, minimum, cls.QUANTIFIER_SEPARATOR, maximum, cls.CLOSE_QUANTIFIER) 809 ) 810 811 def exactly(self, number: int) -> Self: 812 """ 813 @public 814 Returns a new `StringRegex` with a fixed quantifier of *number* appended to *self*. 815 816 ```py 817 from human_regex import StringRegex as Sre 818 819 three_As = Sre("A").exactly(3) 820 assert three_As == "A{3}" 821 ``` 822 """ 823 cls = type(self) 824 number = cls._convert_to_bytes_or_string(number) if number else cls.EMPTY 825 return cls.concatenate((self, cls.OPEN_QUANTIFIER, number, cls.CLOSE_QUANTIFIER))
20class GeneralRegexBase(AbstractRegex): 21 """ 22 Base class for `human_regex.StringRegex` and `human_regex.BytesRegex`. 23 """ 24 25 @classmethod 26 @property 27 @abstractmethod 28 def EMPTY(cls) -> Text_Element: 29 """ 30 @private 31 """ 32 ... 33 34 @classmethod 35 @property 36 @abstractmethod 37 def OPEN_CHAR_SET(cls) -> Text_Element: 38 """ 39 @private 40 """ 41 ... 42 43 @classmethod 44 @property 45 @abstractmethod 46 def CLOSE_CHAR_SET(cls) -> Text_Element: 47 """ 48 @private 49 """ 50 ... 51 52 @classmethod 53 @property 54 @abstractmethod 55 def OPEN_GROUP(cls) -> Text_Element: 56 """ 57 @private 58 """ 59 ... 60 61 @classmethod 62 @property 63 @abstractmethod 64 def CLOSE_GROUP(cls) -> Text_Element: 65 """ 66 @private 67 """ 68 ... 69 70 @classmethod 71 @property 72 @abstractmethod 73 def OPEN_EXTENSION(cls) -> Text_Element: 74 """ 75 @private 76 """ 77 ... 78 79 @classmethod 80 @property 81 @abstractmethod 82 def CLOSE_EXTENSION(cls) -> Text_Element: 83 """ 84 @private 85 """ 86 ... 87 88 @classmethod 89 @property 90 @abstractmethod 91 def OPEN_NAME(cls) -> Text_Element: 92 """ 93 @private 94 """ 95 ... 96 97 @classmethod 98 @property 99 @abstractmethod 100 def CLOSE_NAME(cls) -> Text_Element: 101 """ 102 @private 103 """ 104 ... 105 106 @classmethod 107 @property 108 @abstractmethod 109 def OPEN_QUANTIFIER(cls) -> Text_Element: 110 """ 111 @private 112 """ 113 ... 114 115 @classmethod 116 @property 117 @abstractmethod 118 def CLOSE_QUANTIFIER(cls) -> Text_Element: 119 """ 120 @private 121 """ 122 ... 123 124 @classmethod 125 @property 126 @abstractmethod 127 def QUANTIFIER_SEPARATOR(cls) -> Text_Element: 128 """ 129 @private 130 """ 131 ... 132 133 @classmethod 134 @property 135 @abstractmethod 136 def OR(cls) -> Text_Element: 137 """ 138 @private 139 """ 140 ... 141 142 @classmethod 143 @property 144 @abstractmethod 145 def NO_CAPTURE(cls) -> Text_Element: 146 """ 147 @private 148 """ 149 ... 150 151 @classmethod 152 @property 153 @abstractmethod 154 def FLAGS_END(cls) -> Text_Element: 155 """ 156 @private 157 """ 158 ... 159 160 @classmethod 161 @property 162 @abstractmethod 163 def ATOMIC(cls) -> Text_Element: 164 """ 165 @private 166 """ 167 ... 168 169 @classmethod 170 @property 171 @abstractmethod 172 def NAME_REFERENCE(cls) -> Text_Element: 173 """ 174 @private 175 """ 176 ... 177 178 @classmethod 179 @property 180 @abstractmethod 181 def COMMENT(cls) -> Text_Element: 182 """ 183 @private 184 """ 185 ... 186 187 @classmethod 188 @property 189 @abstractmethod 190 def FOLLOWED_BY(cls) -> Text_Element: 191 """ 192 @private 193 """ 194 ... 195 196 @classmethod 197 @property 198 @abstractmethod 199 def NOT_FOLLOWED_BY(cls) -> Text_Element: 200 """ 201 @private 202 """ 203 ... 204 205 @classmethod 206 @property 207 @abstractmethod 208 def PRECEDED_BY(cls) -> Text_Element: 209 """ 210 @private 211 """ 212 ... 213 214 @classmethod 215 @property 216 @abstractmethod 217 def NOT_PRECEDED_BY(cls) -> Text_Element: 218 """ 219 @private 220 """ 221 ... 222 223 @classmethod 224 @property 225 @abstractmethod 226 def ZERO_OR_MORE(cls) -> Text_Element: 227 """ 228 @private 229 """ 230 ... 231 232 @classmethod 233 @property 234 @abstractmethod 235 def ONE_OR_MORE(cls) -> Text_Element: 236 """ 237 @private 238 """ 239 ... 240 241 @classmethod 242 @property 243 @abstractmethod 244 def OPTIONAL(cls) -> Text_Element: 245 """ 246 @private 247 """ 248 ... 249 250 @classmethod 251 @property 252 @abstractmethod 253 def LAZY(cls) -> Text_Element: 254 """ 255 @private 256 """ 257 ... 258 259 @classmethod 260 def concatenate(cls, elements: Iterable[Text_Element]) -> Self: 261 """ 262 Concatenates items in *elements*. Returns `StringRegex` instance of the joined string. 263 264 ```py 265 from human_regex import StringRegex as Sre 266 267 sre = Sre.concatenate(("Hello", " ", "world")) 268 assert sre == Sre("Hello world") 269 # equivalent to the following: 270 sre = Sre("".join(("Hello", " ", "world"))) 271 sre = Sre("").join(("Hello", " ", "world")) 272 sre = Sre("Hello") + " " + "world" 273 ``` 274 """ 275 str_or_bytes = str if str in cls.__mro__ else bytes 276 result = str_or_bytes(cls.EMPTY).join(elements) 277 return cls(result) 278 279 def __add__(self, other: Text_Element) -> Self: 280 """ 281 @public 282 Support of the `+` operator. Returns a new `StringRegex` instance which has *other* 283 appended to the original `StringRegex` instance. 284 285 ```py 286 from human_regex import StringRegex as Sre 287 288 sre1 = Sre("abc") 289 sre2 = Sre("def") 290 sre = sre1 + sre2 291 assert sre == Sre("abcdef") 292 293 # identical to: 294 sre = Sre("abc") + "def" 295 ``` 296 """ 297 cls = type(self) 298 return cls.concatenate((self, other)) 299 300 def __or__(self, other) -> Self: 301 """ 302 @public 303 Support of the `|` operator. Returns a new `StringRegex` instance which is 304 the original instance joined with *other* using the `|` symbol. 305 306 ```py 307 from human_regex import StringRegex as Sre 308 309 sre1 = Sre("abc") 310 sre2 = Sre("def") 311 sre = sre1 | sre2 312 assert sre == Sre("abc|def") 313 314 # identical to: 315 sre = Sre("abc") | "def" 316 ``` 317 """ 318 cls = type(self) 319 return cls.concatenate((self, cls.OR, other)) 320 321 def append(self, appendent: Text_Element) -> Self: 322 """ 323 @public 324 Returns a new `StringRegex` instance which is the original instance 325 followed by *appendent*. 326 327 ```py 328 from human_regex import StringRegex as Sre 329 330 sre = Sre("pretty").append(" little").append(Sre(" angel")) 331 assert sre == Sre("pretty little angel") 332 ``` 333 """ 334 cls = type(self) 335 return cls.concatenate((self, appendent)) 336 337 def prepend(self, prependent: Text_Element) -> Self: 338 """ 339 @public 340 Returns a new `StringRegex` instance with *self* 341 following the *prependent*. 342 343 This is useful when an already defined `StringRegex` instance is used 344 as a building block of a more complex `StringRegex` and we need 345 to add some `str` instances before it. 346 347 ```py 348 from human_regex import StringRegex as Sre 349 350 party = Sre("Party") 351 # Intention: to add the strings "A Long-" and "Expected " 352 # before the `party` instance. 353 # 354 # The built-in str does not have an .append method, so this will fail: 355 # "A Long-".append("Expected" ").append(party). 356 # 357 # If we use string addition, we would get a str instance 358 # rather than a StringRegex instance: 359 # s: str = "A Long-" + "Expected " + party 360 # 361 # To get a StringRegex instance, we can turn things around 362 # and instead prepend normal str instances to a StringRegex: 363 sre: Sre = party.prepend("Expected ").prepend("A Long-") 364 assert sre == Sre("A Long-Expected Party") 365 366 # Alternative: convert the starting element to StringRegex and add the rest: 367 sre: Sre = Sre("A Long-") + "Expected" + party 368 # Or concatenate: 369 sre: Sre = Sre.concatenate(("A Long-", "Expected ", party)) 370 ``` 371 """ 372 cls = type(self) 373 return cls.concatenate((prependent, self)) 374 375 def join(self, elements: Iterable[Text_Element]) -> Self: 376 # ruff: noqa: RUF002, E501 377 """ 378 @public 379 Analogous to `str.join`, but produces instances of `StringRegex`: 380 381 ```py 382 from human_regex import StringRegex as Sre 383 384 sre = Sre(" ").join(("hello", "world")) 385 assert sre == Sre("hello world") 386 ``` 387 388 A more complex example: A suboptimal regular expression for 389 parsing geographical longitude in the form *127° 36′ 52″ W*: 390 391 ```py 392 from human_regex import StringRegex as Sre 393 394 coordinates = Sre(" ").join(( 395 Sre(r"\\d").repeat(1, 3).named("degrees").append("°"), 396 Sre(r"\\d").repeat(1, 2).named("minutes").append("′"), 397 Sre(r"\\d").repeat(1, 2).named("seconds").append("″"), 398 Sre("EW").set.named("direction"), 399 )).named("coordinates") 400 assert coordinates == r"(?P<coordinates>(?P<degrees>\\d{1,3})° (?P<minutes>\\d{1,2})′ (?P<seconds>\\d{1,2})″ (?P<direction>[EW]))" 401 ``` 402 (The resulting regular expression is suboptimal because it would capture values of degrees, minutes, seconds which 403 are illegal or not part of the longitude notation norm, e.g. `190° 78′ 93″ E` would be a match.) 404 """ 405 cls = type(self) 406 str_or_bytes = str if str in cls.__mro__ else bytes 407 result = str_or_bytes(self).join(elements) 408 return cls(result) 409 410 @property 411 def unnamed(self) -> Self: 412 """ 413 @public 414 Creates an unnamed group with the contents of *self*. 415 416 ```py 417 from human_regex import StringRegex as Sre 418 419 number = Sre(r"\\d").one_or_more.unnamed 420 assert number == "(\\d+)" 421 sre = number.prepend("My favorite number is ").append(r"\\.") 422 assert sre == "My favorite number is (\\d+)\\." 423 ``` 424 """ 425 cls = type(self) 426 return cls.concatenate((cls.OPEN_GROUP, self, cls.CLOSE_GROUP)) 427 428 @property 429 def extension(self) -> Self: 430 """ 431 @public 432 Returns a new `StringRegex` instance with *self* inside the extension notation `(?...)`: 433 434 ```py 435 from human_regex import StringRegex as Sre 436 437 sre: Sre = Sre("something").extension 438 assert sre == "(?something)" 439 ``` 440 """ 441 cls = type(self) 442 return cls.concatenate((cls.OPEN_EXTENSION, self, cls.CLOSE_EXTENSION)) 443 444 @classmethod 445 def set_flags(cls, flags: Text_Element) -> Self: 446 """ 447 @public 448 A way of encoding regular expression flags into the expression string itself, 449 rather than passing it as *flag* argument to the `StringRegex.compile` or other methods. 450 This should only be used at the start of a regular expression. 451 452 Returns a `StringRegex` instance which sets the given *flags*. Flags can be one 453 or more letters from the set `a`, `i`, `L`, `m`, `s`, `u`, `x`. 454 Refer to the documentation of [re](https://docs.python.org/library/re.html), 455 search there for "aiLmsux". 456 457 ```py 458 from human_regex import StringRegex as Sre 459 460 # Create an expression with Sre.MULTILINE | Sre.IGNORECASE 461 sre = Sre.set_flags("mi").append("match.this") 462 assert sre == "(?mi)match.this" 463 ``` 464 """ 465 return cls(flags).extension 466 467 @property 468 def no_capture(self) -> Self: 469 """ 470 @public 471 Returns a new `StringRegex` with a non-capturing group made of *self*: 472 473 ```py 474 from human_regex import StringRegex as Sre 475 476 sre = Sre("match").no_capture 477 assert sre == "(?:match)" 478 ``` 479 """ 480 cls = type(self) 481 return cls.concatenate((cls.NO_CAPTURE, self)).extension 482 483 def modify_flags(self, flags: Text_Element) -> Self: 484 """ 485 @public 486 Allows you to set different flags for a part of a more complex expression. 487 488 Returns a new `StringRegex` instance with *self* inside the 489 modify-flags-extension with flags modified as specified by the 490 *flags* argument. Flags can be one 491 or more letters from the set `a`, `i`, `L`, `m`, `s`, `u`, `x`, optionally 492 followed by "-" followed by one or more letters from the `i`, `m`, `s`, `x` set. 493 494 495 Refer to the documentation of [re](https://docs.python.org/library/re.html), 496 search there for "aiLmsux-imsx". 497 498 ```py 499 from human_regex import StringRegex as Sre 500 part1 = Sre.set_flags("mi").append("multiline.and.ignore.case.here") 501 assert part1 == "(?mi)multiline.and.ignore.case.here" 502 part2 = Sre("add.dot.all.but.do.NOT.ignore.case.HERE.and.no.multiline").modify_flags("s-im") 503 assert part2 == "(?s-im:add.dot.all.but.do.NOT.ignore.case.HERE.and.no.multiline)" 504 part3 = "again.multiline.and.ignore.case.here" 505 sre = Sre.concatenate((part1, part2, part3)) 506 assert sre == "(?mi)multiline.and.ignore.case.here(?s-im:add.dot.all.but.do.NOT.ignore.case.HERE.and.no.multiline)again.multiline.and.ignore.case.here" 507 ``` 508 """ 509 cls = type(self) 510 extension_core = cls.concatenate((flags, cls.FLAGS_END, self)) 511 return extension_core.extension 512 513 @property 514 def atomic(self) -> Self: 515 """ 516 @public 517 Returns a new `StringRegex` instance with *self* as the content of an *atomic group*. 518 519 ```py 520 from human_regex import StringRegex as Sre 521 522 sre = Sre("content").atomic 523 assert sre == "(?>content)" 524 ``` 525 """ 526 cls = type(self) 527 return cls.concatenate((cls.ATOMIC, self)).extension 528 529 def named(self, name: Text_Element) -> Self: 530 """ 531 @public 532 Returns a new `StringRegex` instance with *self* as the content of a group named *name*. 533 534 ```py 535 from human_regex import StringRegex as Sre 536 537 word = Sre(r"\\w").one_or_more # \\w+ 538 burger = word.named("burger") 539 assert burger == r"(?P<burger>\\w+)" 540 extra = word.named("extra") 541 assert extra == r"(?P<extra>\\w+)" 542 543 sre = burger + " with " + extra 544 assert sre == r"(?P<burger>\\w+) with (?P<extra>\\w+)" 545 546 match = sre.match("quarterpounder with cheese") 547 assert match.group("burger") == "quarterpounder" 548 assert match.group("extra") == "cheese" 549 ``` 550 """ 551 cls = type(self) 552 label = cls.concatenate((cls.OPEN_NAME, name, cls.CLOSE_NAME)) 553 return cls.concatenate((label, self)).extension 554 555 @property 556 def backreference(self) -> Self: 557 """ 558 @public 559 Returns `StringRegex` instance with *self* as the name of the group being refered back to. 560 561 ```py 562 from human_regex import StringRegex as Sre 563 564 word = Sre(r"\\w").one_or_more # \\w+ 565 old_ruler = word.named("ruler") # (?P<ruler>\\w+) 566 new_ruler = Sre("ruler").backreference # (?P=ruler) 567 sre = Sre(" ").join(("The", old_ruler, "is dead, long live the", new_ruler.append("!"))) 568 569 assert sre == "The (?P<ruler>\\w+) is dead, long live the (?P=ruler)!" 570 text = "The king is dead, long live the king!" 571 assert sre.match(text) 572 ``` 573 """ 574 cls = type(self) 575 return cls.concatenate((cls.NAME_REFERENCE, self)).extension 576 577 @property 578 def comment(self) -> Self: 579 """ 580 @public 581 Returns a new `StringRegex` with *self* as a comment. 582 583 ```py 584 from human_regex import StringRegex as Sre 585 586 sre = Sre(r"0-9a-f").set + Sre("any hex digit").comment 587 assert sre == "[0-9a-f](?#any hex digit)" 588 ``` 589 """ 590 cls = type(self) 591 return cls.concatenate((cls.COMMENT, self)).extension 592 593 def followed_by(self, following: Text_Element) -> Self: 594 """ 595 @public 596 Returns a new `StringRegex` with *self* extended by *following* as the *positive lookahead assertion*. 597 598 ```py 599 from human_regex import StringRegex as Sre 600 601 sre = Sre("Isaac ").followed_by("Asimov") 602 assert sre == "Isaac (?=Asimov)" 603 ``` 604 """ 605 cls = type(self) 606 follows = cls.concatenate((cls.FOLLOWED_BY, following)).extension 607 return cls.concatenate((self, follows)) 608 609 def not_followed_by(self, not_following: Text_Element) -> Self: 610 """ 611 @public 612 Returns a new `StringRegex` with *self* extended by *not_following* as the *negative lookahead assertion*. 613 614 ```py 615 from human_regex import StringRegex as Sre 616 617 sre = Sre("Isaac ").not_followed_by("Asimov") 618 assert sre == "Isaac (?!Asimov)" 619 ``` 620 """ 621 cls = type(self) 622 does_not_follow = cls.concatenate((cls.NOT_FOLLOWED_BY, not_following)).extension 623 return cls.concatenate((self, does_not_follow)) 624 625 def preceded_by(self, preceding: Text_Element) -> Self: 626 """ 627 @public 628 Returns a new `StringRegex` with *self* extended by *preceding* as the *positive lookbehind assertion*. 629 630 ```py 631 from human_regex import StringRegex as Sre 632 633 sre = Sre("chat").preceded_by("chit") 634 assert sre == "(?<=chit)chat" 635 ``` 636 """ 637 cls = type(self) 638 precedes = cls.concatenate((cls.PRECEDED_BY, preceding)).extension 639 return cls.concatenate((precedes, self)) 640 641 def not_preceded_by(self, not_preceding: Text_Element) -> Self: 642 """ 643 @public 644 Returns a new `StringRegex` with *self* extended by *not_preceding* as the *negative lookbehind assertion*. 645 646 ```py 647 from human_regex import StringRegex as Sre 648 649 sre = Sre("chat").not_preceded_by("chit") 650 assert sre == "(?<!chit)chat" 651 ``` 652 """ 653 cls = type(self) 654 does_not_precede = cls.concatenate((cls.NOT_PRECEDED_BY, not_preceding)).extension 655 return cls.concatenate((does_not_precede, self)) 656 657 @classmethod 658 def yes_no(cls, id_name: int | Text_Element, yes: Text_Element, no: Text_Element | None = None) -> Self: 659 """ 660 @public 661 Constructs the *yes-no-pattern* which will match with *yes*-pattern 662 if the group with given *id_name* exists, and with *no*-pattern if it doesn't. 663 *no*-pattern is optional and can be omitted. *id_name* can be the number 664 of the group or the name of the group if the group was named. 665 666 Example: Recreating the expression `(<)?(\\w+@\\w+(?:\\.\\w+)+)(?(1)>|$)` 667 from the built-in documentation of [re](https://docs.python.org/library/re.html) 668 for a poor email matching pattern, which will match with `<user@host.com>` as well as 669 `user@host.com`, but not with `<user@host.com` nor `user@host.com>`: 670 671 ```py 672 from human_regex import StringRegex as Sre 673 674 word = Sre(r"\\w").one_or_more # \\w+ 675 mail_core = ( 676 word + 677 "@" + 678 word + 679 word.prepend(r"\\.").no_capture.one_or_more 680 ).unnamed # (\\w+@\\w+(?:\\.\\w+)+) 681 maybe_less_than = Sre("<").unnamed.optional # (<)? 682 maybe_greater_than = Sre.yes_no(1, ">", "$") # (?(1)>|$) 683 mail_re = maybe_less_than + mail_core + maybe_greater_than 684 685 assert mail_re == r"(<)?(\\w+@\\w+(?:\\.\\w+)+)(?(1)>|$)" 686 ``` 687 """ 688 id_name = cls._convert_to_bytes_or_string(id_name) if isinstance(id_name, int) else id_name 689 result = cls.concatenate((cls(id_name).unnamed, yes)) 690 if no is not None: 691 result += cls.concatenate((cls.OR, no)) 692 return result.extension 693 694 @classmethod 695 def _convert_to_bytes_or_string(cls, i: int) -> Text_Element: 696 i = str(i) 697 if bytes in cls.__mro__: 698 i = i.encode() 699 return i 700 701 @property 702 def set(self) -> Self: 703 """ 704 @public 705 Returns a new `StringRegex` for a set of *self*. 706 707 ```py 708 from human_regex import StringRegex as Sre 709 710 hex_digits = Sre("a-f0-9").set 711 assert hex_digits == "[a-f0-9]" 712 ``` 713 """ 714 cls = type(self) 715 return cls.concatenate((cls.OPEN_CHAR_SET, self, cls.CLOSE_CHAR_SET)) 716 717 @property 718 def optional(self) -> Self: 719 """ 720 @public 721 Returns a new `StringRegex` with `?` appended to *self*. 722 723 ```py 724 from human_regex import StringRegex as Sre 725 726 character = Sre(r"\\w") 727 optional_character = character.optional 728 assert optional_character == r"\\w?" 729 ``` 730 """ 731 cls = type(self) 732 return cls.concatenate((self, cls.OPTIONAL)) 733 734 @property 735 def zero_or_more(self) -> Self: 736 """ 737 @public 738 Returns a new `StringRegex` with `*` appended to *self*. 739 740 ```py 741 from human_regex import StringRegex as Sre 742 743 digit = Sre(r"\\d") 744 maybe_digits = digit.zero_or_more 745 assert maybe_digits == r"\\d*" 746 ``` 747 """ 748 cls = type(self) 749 return cls.concatenate((self, cls.ZERO_OR_MORE)) 750 751 @property 752 def one_or_more(self) -> Self: 753 """ 754 @public 755 Returns a new `StringRegex` with `+` appended to *self*. 756 757 ```py 758 from human_regex import StringRegex as Sre 759 760 digit = Sre(r"\\d") 761 some_digits = digit.one_or_more 762 assert some_digits == r"\\d+" 763 ``` 764 """ 765 cls = type(self) 766 return cls.concatenate((self, cls.ONE_OR_MORE)) 767 768 @property 769 def lazy(self) -> Self: 770 """ 771 @public 772 Returns a new `StringRegex` with `?` appended to *self*. 773 774 ```py 775 from human_regex import StringRegex as Sre 776 777 everything = Sre(".*") 778 assert everything.lazy == ".*?" 779 780 tab = Sre(r"\\t") 781 everything_before_first_tab = everything.prepend("^").lazy.named("before_tab").append(tab) 782 assert everything_before_first_tab == r"(?P<before_tab>^.*?)\\t" 783 ``` 784 """ 785 cls = type(self) 786 return cls.concatenate((self, cls.LAZY)) 787 788 def repeat(self, minimum, maximum, /) -> Self: 789 """ 790 @public 791 Returns a new `StringRegex` with a greedy quantifier appended to *self*. 792 *minimum* and *maximum* specify limits of repetition. *maximum* is optional 793 794 ```py 795 from human_regex import StringRegex as Sre 796 797 two_or_more_As = Sre("A").repeat(2, None) 798 assert two_or_more_As == "A{2,}" 799 two_to_four_As = Sre("A").repeat(2, 4) 800 assert two_to_four_As == "A{2,4}" 801 up_to_four_As = Sre("A").repeat(None, 4) 802 assert up_to_four_As == "A{,4}" 803 ``` 804 """ 805 cls = type(self) 806 minimum = cls._convert_to_bytes_or_string(minimum) if minimum is not None else cls.EMPTY 807 maximum = cls._convert_to_bytes_or_string(maximum) if maximum is not None else cls.EMPTY 808 return cls.concatenate( 809 (self, cls.OPEN_QUANTIFIER, minimum, cls.QUANTIFIER_SEPARATOR, maximum, cls.CLOSE_QUANTIFIER) 810 ) 811 812 def exactly(self, number: int) -> Self: 813 """ 814 @public 815 Returns a new `StringRegex` with a fixed quantifier of *number* appended to *self*. 816 817 ```py 818 from human_regex import StringRegex as Sre 819 820 three_As = Sre("A").exactly(3) 821 assert three_As == "A{3}" 822 ``` 823 """ 824 cls = type(self) 825 number = cls._convert_to_bytes_or_string(number) if number else cls.EMPTY 826 return cls.concatenate((self, cls.OPEN_QUANTIFIER, number, cls.CLOSE_QUANTIFIER))
Base class for human_regex.StringRegex
and human_regex.BytesRegex
.
259 @classmethod 260 def concatenate(cls, elements: Iterable[Text_Element]) -> Self: 261 """ 262 Concatenates items in *elements*. Returns `StringRegex` instance of the joined string. 263 264 ```py 265 from human_regex import StringRegex as Sre 266 267 sre = Sre.concatenate(("Hello", " ", "world")) 268 assert sre == Sre("Hello world") 269 # equivalent to the following: 270 sre = Sre("".join(("Hello", " ", "world"))) 271 sre = Sre("").join(("Hello", " ", "world")) 272 sre = Sre("Hello") + " " + "world" 273 ``` 274 """ 275 str_or_bytes = str if str in cls.__mro__ else bytes 276 result = str_or_bytes(cls.EMPTY).join(elements) 277 return cls(result)
Concatenates items in elements. Returns StringRegex
instance of the joined string.
from human_regex import StringRegex as Sre
sre = Sre.concatenate(("Hello", " ", "world"))
assert sre == Sre("Hello world")
# equivalent to the following:
sre = Sre("".join(("Hello", " ", "world")))
sre = Sre("").join(("Hello", " ", "world"))
sre = Sre("Hello") + " " + "world"
279 def __add__(self, other: Text_Element) -> Self: 280 """ 281 @public 282 Support of the `+` operator. Returns a new `StringRegex` instance which has *other* 283 appended to the original `StringRegex` instance. 284 285 ```py 286 from human_regex import StringRegex as Sre 287 288 sre1 = Sre("abc") 289 sre2 = Sre("def") 290 sre = sre1 + sre2 291 assert sre == Sre("abcdef") 292 293 # identical to: 294 sre = Sre("abc") + "def" 295 ``` 296 """ 297 cls = type(self) 298 return cls.concatenate((self, other))
Support of the +
operator. Returns a new StringRegex
instance which has other
appended to the original StringRegex
instance.
from human_regex import StringRegex as Sre
sre1 = Sre("abc")
sre2 = Sre("def")
sre = sre1 + sre2
assert sre == Sre("abcdef")
# identical to:
sre = Sre("abc") + "def"
300 def __or__(self, other) -> Self: 301 """ 302 @public 303 Support of the `|` operator. Returns a new `StringRegex` instance which is 304 the original instance joined with *other* using the `|` symbol. 305 306 ```py 307 from human_regex import StringRegex as Sre 308 309 sre1 = Sre("abc") 310 sre2 = Sre("def") 311 sre = sre1 | sre2 312 assert sre == Sre("abc|def") 313 314 # identical to: 315 sre = Sre("abc") | "def" 316 ``` 317 """ 318 cls = type(self) 319 return cls.concatenate((self, cls.OR, other))
Support of the |
operator. Returns a new StringRegex
instance which is
the original instance joined with other using the |
symbol.
from human_regex import StringRegex as Sre
sre1 = Sre("abc")
sre2 = Sre("def")
sre = sre1 | sre2
assert sre == Sre("abc|def")
# identical to:
sre = Sre("abc") | "def"
321 def append(self, appendent: Text_Element) -> Self: 322 """ 323 @public 324 Returns a new `StringRegex` instance which is the original instance 325 followed by *appendent*. 326 327 ```py 328 from human_regex import StringRegex as Sre 329 330 sre = Sre("pretty").append(" little").append(Sre(" angel")) 331 assert sre == Sre("pretty little angel") 332 ``` 333 """ 334 cls = type(self) 335 return cls.concatenate((self, appendent))
Returns a new StringRegex
instance which is the original instance
followed by appendent.
from human_regex import StringRegex as Sre
sre = Sre("pretty").append(" little").append(Sre(" angel"))
assert sre == Sre("pretty little angel")
337 def prepend(self, prependent: Text_Element) -> Self: 338 """ 339 @public 340 Returns a new `StringRegex` instance with *self* 341 following the *prependent*. 342 343 This is useful when an already defined `StringRegex` instance is used 344 as a building block of a more complex `StringRegex` and we need 345 to add some `str` instances before it. 346 347 ```py 348 from human_regex import StringRegex as Sre 349 350 party = Sre("Party") 351 # Intention: to add the strings "A Long-" and "Expected " 352 # before the `party` instance. 353 # 354 # The built-in str does not have an .append method, so this will fail: 355 # "A Long-".append("Expected" ").append(party). 356 # 357 # If we use string addition, we would get a str instance 358 # rather than a StringRegex instance: 359 # s: str = "A Long-" + "Expected " + party 360 # 361 # To get a StringRegex instance, we can turn things around 362 # and instead prepend normal str instances to a StringRegex: 363 sre: Sre = party.prepend("Expected ").prepend("A Long-") 364 assert sre == Sre("A Long-Expected Party") 365 366 # Alternative: convert the starting element to StringRegex and add the rest: 367 sre: Sre = Sre("A Long-") + "Expected" + party 368 # Or concatenate: 369 sre: Sre = Sre.concatenate(("A Long-", "Expected ", party)) 370 ``` 371 """ 372 cls = type(self) 373 return cls.concatenate((prependent, self))
Returns a new StringRegex
instance with self
following the prependent.
This is useful when an already defined StringRegex
instance is used
as a building block of a more complex StringRegex
and we need
to add some str
instances before it.
from human_regex import StringRegex as Sre
party = Sre("Party")
# Intention: to add the strings "A Long-" and "Expected "
# before the `party` instance.
#
# The built-in str does not have an .append method, so this will fail:
# "A Long-".append("Expected" ").append(party).
#
# If we use string addition, we would get a str instance
# rather than a StringRegex instance:
# s: str = "A Long-" + "Expected " + party
#
# To get a StringRegex instance, we can turn things around
# and instead prepend normal str instances to a StringRegex:
sre: Sre = party.prepend("Expected ").prepend("A Long-")
assert sre == Sre("A Long-Expected Party")
# Alternative: convert the starting element to StringRegex and add the rest:
sre: Sre = Sre("A Long-") + "Expected" + party
# Or concatenate:
sre: Sre = Sre.concatenate(("A Long-", "Expected ", party))
375 def join(self, elements: Iterable[Text_Element]) -> Self: 376 # ruff: noqa: RUF002, E501 377 """ 378 @public 379 Analogous to `str.join`, but produces instances of `StringRegex`: 380 381 ```py 382 from human_regex import StringRegex as Sre 383 384 sre = Sre(" ").join(("hello", "world")) 385 assert sre == Sre("hello world") 386 ``` 387 388 A more complex example: A suboptimal regular expression for 389 parsing geographical longitude in the form *127° 36′ 52″ W*: 390 391 ```py 392 from human_regex import StringRegex as Sre 393 394 coordinates = Sre(" ").join(( 395 Sre(r"\\d").repeat(1, 3).named("degrees").append("°"), 396 Sre(r"\\d").repeat(1, 2).named("minutes").append("′"), 397 Sre(r"\\d").repeat(1, 2).named("seconds").append("″"), 398 Sre("EW").set.named("direction"), 399 )).named("coordinates") 400 assert coordinates == r"(?P<coordinates>(?P<degrees>\\d{1,3})° (?P<minutes>\\d{1,2})′ (?P<seconds>\\d{1,2})″ (?P<direction>[EW]))" 401 ``` 402 (The resulting regular expression is suboptimal because it would capture values of degrees, minutes, seconds which 403 are illegal or not part of the longitude notation norm, e.g. `190° 78′ 93″ E` would be a match.) 404 """ 405 cls = type(self) 406 str_or_bytes = str if str in cls.__mro__ else bytes 407 result = str_or_bytes(self).join(elements) 408 return cls(result)
Analogous to str.join
, but produces instances of StringRegex
:
from human_regex import StringRegex as Sre
sre = Sre(" ").join(("hello", "world"))
assert sre == Sre("hello world")
A more complex example: A suboptimal regular expression for parsing geographical longitude in the form 127° 36′ 52″ W:
from human_regex import StringRegex as Sre
coordinates = Sre(" ").join((
Sre(r"\d").repeat(1, 3).named("degrees").append("°"),
Sre(r"\d").repeat(1, 2).named("minutes").append("′"),
Sre(r"\d").repeat(1, 2).named("seconds").append("″"),
Sre("EW").set.named("direction"),
)).named("coordinates")
assert coordinates == r"(?P<coordinates>(?P<degrees>\d{1,3})° (?P<minutes>\d{1,2})′ (?P<seconds>\d{1,2})″ (?P<direction>[EW]))"
(The resulting regular expression is suboptimal because it would capture values of degrees, minutes, seconds which
are illegal or not part of the longitude notation norm, e.g. 190° 78′ 93″ E
would be a match.)
410 @property 411 def unnamed(self) -> Self: 412 """ 413 @public 414 Creates an unnamed group with the contents of *self*. 415 416 ```py 417 from human_regex import StringRegex as Sre 418 419 number = Sre(r"\\d").one_or_more.unnamed 420 assert number == "(\\d+)" 421 sre = number.prepend("My favorite number is ").append(r"\\.") 422 assert sre == "My favorite number is (\\d+)\\." 423 ``` 424 """ 425 cls = type(self) 426 return cls.concatenate((cls.OPEN_GROUP, self, cls.CLOSE_GROUP))
Creates an unnamed group with the contents of self.
from human_regex import StringRegex as Sre
number = Sre(r"\d").one_or_more.unnamed
assert number == "(\d+)"
sre = number.prepend("My favorite number is ").append(r"\.")
assert sre == "My favorite number is (\d+)\."
428 @property 429 def extension(self) -> Self: 430 """ 431 @public 432 Returns a new `StringRegex` instance with *self* inside the extension notation `(?...)`: 433 434 ```py 435 from human_regex import StringRegex as Sre 436 437 sre: Sre = Sre("something").extension 438 assert sre == "(?something)" 439 ``` 440 """ 441 cls = type(self) 442 return cls.concatenate((cls.OPEN_EXTENSION, self, cls.CLOSE_EXTENSION))
Returns a new StringRegex
instance with self inside the extension notation (?...)
:
from human_regex import StringRegex as Sre
sre: Sre = Sre("something").extension
assert sre == "(?something)"
444 @classmethod 445 def set_flags(cls, flags: Text_Element) -> Self: 446 """ 447 @public 448 A way of encoding regular expression flags into the expression string itself, 449 rather than passing it as *flag* argument to the `StringRegex.compile` or other methods. 450 This should only be used at the start of a regular expression. 451 452 Returns a `StringRegex` instance which sets the given *flags*. Flags can be one 453 or more letters from the set `a`, `i`, `L`, `m`, `s`, `u`, `x`. 454 Refer to the documentation of [re](https://docs.python.org/library/re.html), 455 search there for "aiLmsux". 456 457 ```py 458 from human_regex import StringRegex as Sre 459 460 # Create an expression with Sre.MULTILINE | Sre.IGNORECASE 461 sre = Sre.set_flags("mi").append("match.this") 462 assert sre == "(?mi)match.this" 463 ``` 464 """ 465 return cls(flags).extension
A way of encoding regular expression flags into the expression string itself,
rather than passing it as flag argument to the StringRegex.compile
or other methods.
This should only be used at the start of a regular expression.
Returns a StringRegex
instance which sets the given flags. Flags can be one
or more letters from the set a
, i
, L
, m
, s
, u
, x
.
Refer to the documentation of re,
search there for "aiLmsux".
from human_regex import StringRegex as Sre
# Create an expression with Sre.MULTILINE | Sre.IGNORECASE
sre = Sre.set_flags("mi").append("match.this")
assert sre == "(?mi)match.this"
467 @property 468 def no_capture(self) -> Self: 469 """ 470 @public 471 Returns a new `StringRegex` with a non-capturing group made of *self*: 472 473 ```py 474 from human_regex import StringRegex as Sre 475 476 sre = Sre("match").no_capture 477 assert sre == "(?:match)" 478 ``` 479 """ 480 cls = type(self) 481 return cls.concatenate((cls.NO_CAPTURE, self)).extension
Returns a new StringRegex
with a non-capturing group made of self:
from human_regex import StringRegex as Sre
sre = Sre("match").no_capture
assert sre == "(?:match)"
483 def modify_flags(self, flags: Text_Element) -> Self: 484 """ 485 @public 486 Allows you to set different flags for a part of a more complex expression. 487 488 Returns a new `StringRegex` instance with *self* inside the 489 modify-flags-extension with flags modified as specified by the 490 *flags* argument. Flags can be one 491 or more letters from the set `a`, `i`, `L`, `m`, `s`, `u`, `x`, optionally 492 followed by "-" followed by one or more letters from the `i`, `m`, `s`, `x` set. 493 494 495 Refer to the documentation of [re](https://docs.python.org/library/re.html), 496 search there for "aiLmsux-imsx". 497 498 ```py 499 from human_regex import StringRegex as Sre 500 part1 = Sre.set_flags("mi").append("multiline.and.ignore.case.here") 501 assert part1 == "(?mi)multiline.and.ignore.case.here" 502 part2 = Sre("add.dot.all.but.do.NOT.ignore.case.HERE.and.no.multiline").modify_flags("s-im") 503 assert part2 == "(?s-im:add.dot.all.but.do.NOT.ignore.case.HERE.and.no.multiline)" 504 part3 = "again.multiline.and.ignore.case.here" 505 sre = Sre.concatenate((part1, part2, part3)) 506 assert sre == "(?mi)multiline.and.ignore.case.here(?s-im:add.dot.all.but.do.NOT.ignore.case.HERE.and.no.multiline)again.multiline.and.ignore.case.here" 507 ``` 508 """ 509 cls = type(self) 510 extension_core = cls.concatenate((flags, cls.FLAGS_END, self)) 511 return extension_core.extension
Allows you to set different flags for a part of a more complex expression.
Returns a new StringRegex
instance with self inside the
modify-flags-extension with flags modified as specified by the
flags argument. Flags can be one
or more letters from the set a
, i
, L
, m
, s
, u
, x
, optionally
followed by "-" followed by one or more letters from the i
, m
, s
, x
set.
Refer to the documentation of re, search there for "aiLmsux-imsx".
from human_regex import StringRegex as Sre
part1 = Sre.set_flags("mi").append("multiline.and.ignore.case.here")
assert part1 == "(?mi)multiline.and.ignore.case.here"
part2 = Sre("add.dot.all.but.do.NOT.ignore.case.HERE.and.no.multiline").modify_flags("s-im")
assert part2 == "(?s-im:add.dot.all.but.do.NOT.ignore.case.HERE.and.no.multiline)"
part3 = "again.multiline.and.ignore.case.here"
sre = Sre.concatenate((part1, part2, part3))
assert sre == "(?mi)multiline.and.ignore.case.here(?s-im:add.dot.all.but.do.NOT.ignore.case.HERE.and.no.multiline)again.multiline.and.ignore.case.here"
513 @property 514 def atomic(self) -> Self: 515 """ 516 @public 517 Returns a new `StringRegex` instance with *self* as the content of an *atomic group*. 518 519 ```py 520 from human_regex import StringRegex as Sre 521 522 sre = Sre("content").atomic 523 assert sre == "(?>content)" 524 ``` 525 """ 526 cls = type(self) 527 return cls.concatenate((cls.ATOMIC, self)).extension
Returns a new StringRegex
instance with self as the content of an atomic group.
from human_regex import StringRegex as Sre
sre = Sre("content").atomic
assert sre == "(?>content)"
529 def named(self, name: Text_Element) -> Self: 530 """ 531 @public 532 Returns a new `StringRegex` instance with *self* as the content of a group named *name*. 533 534 ```py 535 from human_regex import StringRegex as Sre 536 537 word = Sre(r"\\w").one_or_more # \\w+ 538 burger = word.named("burger") 539 assert burger == r"(?P<burger>\\w+)" 540 extra = word.named("extra") 541 assert extra == r"(?P<extra>\\w+)" 542 543 sre = burger + " with " + extra 544 assert sre == r"(?P<burger>\\w+) with (?P<extra>\\w+)" 545 546 match = sre.match("quarterpounder with cheese") 547 assert match.group("burger") == "quarterpounder" 548 assert match.group("extra") == "cheese" 549 ``` 550 """ 551 cls = type(self) 552 label = cls.concatenate((cls.OPEN_NAME, name, cls.CLOSE_NAME)) 553 return cls.concatenate((label, self)).extension
Returns a new StringRegex
instance with self as the content of a group named name.
from human_regex import StringRegex as Sre
word = Sre(r"\w").one_or_more # \w+
burger = word.named("burger")
assert burger == r"(?P<burger>\w+)"
extra = word.named("extra")
assert extra == r"(?P<extra>\w+)"
sre = burger + " with " + extra
assert sre == r"(?P<burger>\w+) with (?P<extra>\w+)"
match = sre.match("quarterpounder with cheese")
assert match.group("burger") == "quarterpounder"
assert match.group("extra") == "cheese"
555 @property 556 def backreference(self) -> Self: 557 """ 558 @public 559 Returns `StringRegex` instance with *self* as the name of the group being refered back to. 560 561 ```py 562 from human_regex import StringRegex as Sre 563 564 word = Sre(r"\\w").one_or_more # \\w+ 565 old_ruler = word.named("ruler") # (?P<ruler>\\w+) 566 new_ruler = Sre("ruler").backreference # (?P=ruler) 567 sre = Sre(" ").join(("The", old_ruler, "is dead, long live the", new_ruler.append("!"))) 568 569 assert sre == "The (?P<ruler>\\w+) is dead, long live the (?P=ruler)!" 570 text = "The king is dead, long live the king!" 571 assert sre.match(text) 572 ``` 573 """ 574 cls = type(self) 575 return cls.concatenate((cls.NAME_REFERENCE, self)).extension
Returns StringRegex
instance with self as the name of the group being refered back to.
from human_regex import StringRegex as Sre
word = Sre(r"\w").one_or_more # \w+
old_ruler = word.named("ruler") # (?P<ruler>\w+)
new_ruler = Sre("ruler").backreference # (?P=ruler)
sre = Sre(" ").join(("The", old_ruler, "is dead, long live the", new_ruler.append("!")))
assert sre == "The (?P<ruler>\w+) is dead, long live the (?P=ruler)!"
text = "The king is dead, long live the king!"
assert sre.match(text)
577 @property 578 def comment(self) -> Self: 579 """ 580 @public 581 Returns a new `StringRegex` with *self* as a comment. 582 583 ```py 584 from human_regex import StringRegex as Sre 585 586 sre = Sre(r"0-9a-f").set + Sre("any hex digit").comment 587 assert sre == "[0-9a-f](?#any hex digit)" 588 ``` 589 """ 590 cls = type(self) 591 return cls.concatenate((cls.COMMENT, self)).extension
Returns a new StringRegex
with self as a comment.
from human_regex import StringRegex as Sre
sre = Sre(r"0-9a-f").set + Sre("any hex digit").comment
assert sre == "[0-9a-f](?#any hex digit)"
593 def followed_by(self, following: Text_Element) -> Self: 594 """ 595 @public 596 Returns a new `StringRegex` with *self* extended by *following* as the *positive lookahead assertion*. 597 598 ```py 599 from human_regex import StringRegex as Sre 600 601 sre = Sre("Isaac ").followed_by("Asimov") 602 assert sre == "Isaac (?=Asimov)" 603 ``` 604 """ 605 cls = type(self) 606 follows = cls.concatenate((cls.FOLLOWED_BY, following)).extension 607 return cls.concatenate((self, follows))
Returns a new StringRegex
with self extended by following as the positive lookahead assertion.
from human_regex import StringRegex as Sre
sre = Sre("Isaac ").followed_by("Asimov")
assert sre == "Isaac (?=Asimov)"
609 def not_followed_by(self, not_following: Text_Element) -> Self: 610 """ 611 @public 612 Returns a new `StringRegex` with *self* extended by *not_following* as the *negative lookahead assertion*. 613 614 ```py 615 from human_regex import StringRegex as Sre 616 617 sre = Sre("Isaac ").not_followed_by("Asimov") 618 assert sre == "Isaac (?!Asimov)" 619 ``` 620 """ 621 cls = type(self) 622 does_not_follow = cls.concatenate((cls.NOT_FOLLOWED_BY, not_following)).extension 623 return cls.concatenate((self, does_not_follow))
Returns a new StringRegex
with self extended by not_following as the negative lookahead assertion.
from human_regex import StringRegex as Sre
sre = Sre("Isaac ").not_followed_by("Asimov")
assert sre == "Isaac (?!Asimov)"
625 def preceded_by(self, preceding: Text_Element) -> Self: 626 """ 627 @public 628 Returns a new `StringRegex` with *self* extended by *preceding* as the *positive lookbehind assertion*. 629 630 ```py 631 from human_regex import StringRegex as Sre 632 633 sre = Sre("chat").preceded_by("chit") 634 assert sre == "(?<=chit)chat" 635 ``` 636 """ 637 cls = type(self) 638 precedes = cls.concatenate((cls.PRECEDED_BY, preceding)).extension 639 return cls.concatenate((precedes, self))
Returns a new StringRegex
with self extended by preceding as the positive lookbehind assertion.
from human_regex import StringRegex as Sre
sre = Sre("chat").preceded_by("chit")
assert sre == "(?<=chit)chat"
641 def not_preceded_by(self, not_preceding: Text_Element) -> Self: 642 """ 643 @public 644 Returns a new `StringRegex` with *self* extended by *not_preceding* as the *negative lookbehind assertion*. 645 646 ```py 647 from human_regex import StringRegex as Sre 648 649 sre = Sre("chat").not_preceded_by("chit") 650 assert sre == "(?<!chit)chat" 651 ``` 652 """ 653 cls = type(self) 654 does_not_precede = cls.concatenate((cls.NOT_PRECEDED_BY, not_preceding)).extension 655 return cls.concatenate((does_not_precede, self))
Returns a new StringRegex
with self extended by not_preceding as the negative lookbehind assertion.
from human_regex import StringRegex as Sre
sre = Sre("chat").not_preceded_by("chit")
assert sre == "(?<!chit)chat"
657 @classmethod 658 def yes_no(cls, id_name: int | Text_Element, yes: Text_Element, no: Text_Element | None = None) -> Self: 659 """ 660 @public 661 Constructs the *yes-no-pattern* which will match with *yes*-pattern 662 if the group with given *id_name* exists, and with *no*-pattern if it doesn't. 663 *no*-pattern is optional and can be omitted. *id_name* can be the number 664 of the group or the name of the group if the group was named. 665 666 Example: Recreating the expression `(<)?(\\w+@\\w+(?:\\.\\w+)+)(?(1)>|$)` 667 from the built-in documentation of [re](https://docs.python.org/library/re.html) 668 for a poor email matching pattern, which will match with `<user@host.com>` as well as 669 `user@host.com`, but not with `<user@host.com` nor `user@host.com>`: 670 671 ```py 672 from human_regex import StringRegex as Sre 673 674 word = Sre(r"\\w").one_or_more # \\w+ 675 mail_core = ( 676 word + 677 "@" + 678 word + 679 word.prepend(r"\\.").no_capture.one_or_more 680 ).unnamed # (\\w+@\\w+(?:\\.\\w+)+) 681 maybe_less_than = Sre("<").unnamed.optional # (<)? 682 maybe_greater_than = Sre.yes_no(1, ">", "$") # (?(1)>|$) 683 mail_re = maybe_less_than + mail_core + maybe_greater_than 684 685 assert mail_re == r"(<)?(\\w+@\\w+(?:\\.\\w+)+)(?(1)>|$)" 686 ``` 687 """ 688 id_name = cls._convert_to_bytes_or_string(id_name) if isinstance(id_name, int) else id_name 689 result = cls.concatenate((cls(id_name).unnamed, yes)) 690 if no is not None: 691 result += cls.concatenate((cls.OR, no)) 692 return result.extension
Constructs the yes-no-pattern which will match with yes-pattern if the group with given id_name exists, and with no-pattern if it doesn't. no-pattern is optional and can be omitted. id_name can be the number of the group or the name of the group if the group was named.
Example: Recreating the expression (<)?(\w+@\w+(?:\.\w+)+)(?(1)>|$)
from the built-in documentation of re
for a poor email matching pattern, which will match with <user@host.com>
as well as
user@host.com
, but not with <user@host.com
nor user@host.com>
:
from human_regex import StringRegex as Sre
word = Sre(r"\w").one_or_more # \w+
mail_core = (
word +
"@" +
word +
word.prepend(r"\.").no_capture.one_or_more
).unnamed # (\w+@\w+(?:\.\w+)+)
maybe_less_than = Sre("<").unnamed.optional # (<)?
maybe_greater_than = Sre.yes_no(1, ">", "$") # (?(1)>|$)
mail_re = maybe_less_than + mail_core + maybe_greater_than
assert mail_re == r"(<)?(\w+@\w+(?:\.\w+)+)(?(1)>|$)"
701 @property 702 def set(self) -> Self: 703 """ 704 @public 705 Returns a new `StringRegex` for a set of *self*. 706 707 ```py 708 from human_regex import StringRegex as Sre 709 710 hex_digits = Sre("a-f0-9").set 711 assert hex_digits == "[a-f0-9]" 712 ``` 713 """ 714 cls = type(self) 715 return cls.concatenate((cls.OPEN_CHAR_SET, self, cls.CLOSE_CHAR_SET))
Returns a new StringRegex
for a set of self.
from human_regex import StringRegex as Sre
hex_digits = Sre("a-f0-9").set
assert hex_digits == "[a-f0-9]"
717 @property 718 def optional(self) -> Self: 719 """ 720 @public 721 Returns a new `StringRegex` with `?` appended to *self*. 722 723 ```py 724 from human_regex import StringRegex as Sre 725 726 character = Sre(r"\\w") 727 optional_character = character.optional 728 assert optional_character == r"\\w?" 729 ``` 730 """ 731 cls = type(self) 732 return cls.concatenate((self, cls.OPTIONAL))
Returns a new StringRegex
with ?
appended to self.
from human_regex import StringRegex as Sre
character = Sre(r"\w")
optional_character = character.optional
assert optional_character == r"\w?"
734 @property 735 def zero_or_more(self) -> Self: 736 """ 737 @public 738 Returns a new `StringRegex` with `*` appended to *self*. 739 740 ```py 741 from human_regex import StringRegex as Sre 742 743 digit = Sre(r"\\d") 744 maybe_digits = digit.zero_or_more 745 assert maybe_digits == r"\\d*" 746 ``` 747 """ 748 cls = type(self) 749 return cls.concatenate((self, cls.ZERO_OR_MORE))
Returns a new StringRegex
with *
appended to self.
from human_regex import StringRegex as Sre
digit = Sre(r"\d")
maybe_digits = digit.zero_or_more
assert maybe_digits == r"\d*"
751 @property 752 def one_or_more(self) -> Self: 753 """ 754 @public 755 Returns a new `StringRegex` with `+` appended to *self*. 756 757 ```py 758 from human_regex import StringRegex as Sre 759 760 digit = Sre(r"\\d") 761 some_digits = digit.one_or_more 762 assert some_digits == r"\\d+" 763 ``` 764 """ 765 cls = type(self) 766 return cls.concatenate((self, cls.ONE_OR_MORE))
Returns a new StringRegex
with +
appended to self.
from human_regex import StringRegex as Sre
digit = Sre(r"\d")
some_digits = digit.one_or_more
assert some_digits == r"\d+"
768 @property 769 def lazy(self) -> Self: 770 """ 771 @public 772 Returns a new `StringRegex` with `?` appended to *self*. 773 774 ```py 775 from human_regex import StringRegex as Sre 776 777 everything = Sre(".*") 778 assert everything.lazy == ".*?" 779 780 tab = Sre(r"\\t") 781 everything_before_first_tab = everything.prepend("^").lazy.named("before_tab").append(tab) 782 assert everything_before_first_tab == r"(?P<before_tab>^.*?)\\t" 783 ``` 784 """ 785 cls = type(self) 786 return cls.concatenate((self, cls.LAZY))
Returns a new StringRegex
with ?
appended to self.
from human_regex import StringRegex as Sre
everything = Sre(".*")
assert everything.lazy == ".*?"
tab = Sre(r"\t")
everything_before_first_tab = everything.prepend("^").lazy.named("before_tab").append(tab)
assert everything_before_first_tab == r"(?P<before_tab>^.*?)\t"
788 def repeat(self, minimum, maximum, /) -> Self: 789 """ 790 @public 791 Returns a new `StringRegex` with a greedy quantifier appended to *self*. 792 *minimum* and *maximum* specify limits of repetition. *maximum* is optional 793 794 ```py 795 from human_regex import StringRegex as Sre 796 797 two_or_more_As = Sre("A").repeat(2, None) 798 assert two_or_more_As == "A{2,}" 799 two_to_four_As = Sre("A").repeat(2, 4) 800 assert two_to_four_As == "A{2,4}" 801 up_to_four_As = Sre("A").repeat(None, 4) 802 assert up_to_four_As == "A{,4}" 803 ``` 804 """ 805 cls = type(self) 806 minimum = cls._convert_to_bytes_or_string(minimum) if minimum is not None else cls.EMPTY 807 maximum = cls._convert_to_bytes_or_string(maximum) if maximum is not None else cls.EMPTY 808 return cls.concatenate( 809 (self, cls.OPEN_QUANTIFIER, minimum, cls.QUANTIFIER_SEPARATOR, maximum, cls.CLOSE_QUANTIFIER) 810 )
Returns a new StringRegex
with a greedy quantifier appended to self.
minimum and maximum specify limits of repetition. maximum is optional
from human_regex import StringRegex as Sre
two_or_more_As = Sre("A").repeat(2, None)
assert two_or_more_As == "A{2,}"
two_to_four_As = Sre("A").repeat(2, 4)
assert two_to_four_As == "A{2,4}"
up_to_four_As = Sre("A").repeat(None, 4)
assert up_to_four_As == "A{,4}"
812 def exactly(self, number: int) -> Self: 813 """ 814 @public 815 Returns a new `StringRegex` with a fixed quantifier of *number* appended to *self*. 816 817 ```py 818 from human_regex import StringRegex as Sre 819 820 three_As = Sre("A").exactly(3) 821 assert three_As == "A{3}" 822 ``` 823 """ 824 cls = type(self) 825 number = cls._convert_to_bytes_or_string(number) if number else cls.EMPTY 826 return cls.concatenate((self, cls.OPEN_QUANTIFIER, number, cls.CLOSE_QUANTIFIER))
Returns a new StringRegex
with a fixed quantifier of number appended to self.
from human_regex import StringRegex as Sre
three_As = Sre("A").exactly(3)
assert three_As == "A{3}"