Python Library: re (regex)

26 Jul 2022

New app:
"English to RegEx with Natural Language Processing"
"Regex is difficult to write and comprehend to the average human reader because of its complex patterns. This website uses GPT-3 to translate back and forth between English and RegEx"
https://www.autoregex.xyz/

apps_autoregex.jpg

Script examples

spilt on first uppercase letter in string

Given the following, I want to split each string on first uppercase letter to separate command and description (separated by a variable amount of spaces, no tab):

'afconvert Audio File Convert'
'afinfo    Audio File Info'
'afplay    Audio File Play'
'airport   Manage Apple AirPort'
'alias     Create an alias •'
'alloc     List used and free memory'
'apropos   Search the whatis database for strings'
'asr       Apple Software Restore'
'atsutil   Font registration system utility'

working code:

import re

for row in ws:
    count_row += 1
    first = row[0].value.strip()
    command = [s for s in re.split("([A-Z][^A-Z]*)", first) if s][0].strip()
    description = re.findall('[A-Z][^A-Z]*', first)[0]
    print(f"{command=}")
    print(f"{description=}")
    print()

regex email addresses

x=re.search('(\w+[.|\w])*@(\w+[.])*\w+',email4)
x.group()

# EXAMPLE:

x = '(\w+[.|\w])*@(\w+[.])*\w+'

listEmails = list()

with open('/path/to/folder/recipients.txt', 'r') as f:
    f = f.read()
    string = str(f)
    array = string.split(",")
    for x in array:
        x = x.split('<', 1)[1]
        x = x.split('>', 1)[0]
        print(x)
        listEmails.append([x])

print(list)

remove invisible characters

import re

##GETS RID OF SPACES    

# string contains the \u2003 character
string = u'This is a   test string ’'
# this regex will replace all special characters with a space
re.sub('\W+',' ',string).strip()

#PRESERVES SPACES:
re.sub('[^!-~ ]+',' ',country).strip()

capitalise sentence

25 Sep 2022

def capitalise_sentence(og_string, v=False):
    if v:
        print(f"---start verbose capitalise_sentence (deactivate with v=False)")
        print(f"\n{og_string=}")
    # lowercase everything
    lower_s = og_string.lower()
    if v:
        print(f"\n{lower_s=}")
    # start of string & acronyms
    final = re.sub(r"(\A\w)|"+                  # start of string
            "(?<!\.\w)([\.?!] )\w|"+    # after a ?/!/. and a space, 
                                        # but not after an acronym
            "\w(?:\.\w)|"+              # start/middle of acronym
            "(?<=\w\.)\w",              # end of acronym
            lambda x: x.group().upper(), 
            lower_s)
    if v:
        print(f"\nstart_string {final=}")
    # I exception
    if ' i ' in final:
        final = final.replace(' i ', ' I ')
        if v:
            print(f"\n' i ' {final=}")
    if " i'm " in final:
        final = final.replace(" i'm ", " I'm ")
        if v:
            print(f"\n' i'm ' {final=}")
    if v:
        print(f"\nreturned repr(final)={repr(final)}\n\n---end verbose capitalise_sentence\n")
    return final

s = "hey. how are you? i'm A.F.K. At the Moment but i don't Mind!  "

test = capitalise_sentence(s, v=True)

print(test)

outputs:

---start verbose capitalise_sentence (deactivate with v=False)

og_string="hey. how are you? i'm A.F.K. At the Moment but i don't Mind!  "

lower_s="hey. how are you? i'm a.f.k. at the moment but i don't mind!  "

start_string final="Hey. How are you? I'm A.F.K. at the moment but i don't mind!  "

' i ' final="Hey. How are you? I'm A.F.K. at the moment but I don't mind!  "

returned repr(final)="Hey. How are you? I'm A.F.K. at the moment but I don't mind!  "

---end verbose capitalise_sentence

Hey. How are you? I'm A.F.K. at the moment but I don't mind!  

inspired by & tweaked from https://stackoverflow.com/questions/19785458/capitalization-of-sentences-in-python

links

social