Python Regular Expressions
return to DevPythonReplace (Sub)
re.sub(pattern, repl, string[, count, flags]) (docs)Basic alphanum-only example
> s = 'John!Paul/9{}George'
> re.sub('[^0-9a-zA-Z]', '_', s)
'John_Paul_9__George'Replace extra spaces in XML
import re
re_p = """\s{2,}"""
xml1 = """
<outer>
<inner attr="foo">
some text
</inner>
<outer>
"""
xml2 = re.sub(re_p, ' ', xml1).replace('> <', '><')
print xml2
re_p = """\s{2,}"""
xml1 = """
<outer>
<inner attr="foo">
some text
</inner>
<outer>
"""
xml2 = re.sub(re_p, ' ', xml1).replace('> <', '><')
print xml2
Split
Split on One or More Spaces
>>> import re
>>> s = 'SAN DIEGO CA 92037 PAGE 2'
>>> re.split('\s+', s)
['SAN', 'DIEGO', 'CA', '92037', 'PAGE', '2']
>>> s = 'SAN DIEGO CA 92037 PAGE 2'
>>> re.split('\s+', s)
['SAN', 'DIEGO', 'CA', '92037', 'PAGE', '2']
Ungreedy Search
import re
def re_ungreedy(re_, s):
r = re_.replace('%s', '(.*?)')
return re.search(r, s).groups()
re_ = "<h2>%s</h2>"
H2List = re_ungreedy(re_, '<h2>one</h2><h2>two</h2>')
def re_ungreedy(re_, s):
r = re_.replace('%s', '(.*?)')
return re.search(r, s).groups()
re_ = "<h2>%s</h2>"
H2List = re_ungreedy(re_, '<h2>one</h2><h2>two</h2>')
[There are no comments on this page]