Article From:https://www.cnblogs.com/cheese320/p/9061551.html
# !/user/bin/python
# -*- coding: utf-8 -*-

import re
# match, Start from scratch
a = re.match("192", "inet Address: 192.168.12.44")  # Mismatch, because the beginning of the string is not 192.
b = re.match("inet", "inet Address: 192.168.12.44")  # Return to < _sre.SRE_Match object; span= (0, 4), match='inet'>
print(a)
print(b)
print(b.group())  # Returns all the matched values.

# The point represents any character other than \n
c = re.match(".+", "inet Address: 192.168.12.44")
print(c)


# /wNeed to match at least once
d = re.match("/w?", "inet Address: 192.168.12.44")
print(d)


# ?A representation of one or zero
d = re.match("/w?", "inet Address: 192.168.12.44")
print(d)

# search Search; groups can group characters in strings
e = re.search("(abc){2}a(123|456)c", "abcabca456c")
print(e.group())  # Return to abcabca456c
print(e.groups())  # Return ('abc','456')

f = re.search("(\d\d)(\d\d)(\d){2}(\d){4}", "370218198709279908")
print(f.groups())  # (\d\d) The form will connect the matching characters together; (\d) {4} will write the last character in single form.
                   # Return ('37','02','8','7').
h = re.search("(\d{2})(\d{2})(\d{2})(\d{4})(\d{4})", "370218198709279908")
print(h.groups())  # Returns ('37','02','18','1987','0927'). Grouping strings.


# A Match only from the beginning of the character
i = re.search("\Aa", "abcabca456c")
print(i.group())  # Return to a


# ^ Start with a specified character / number, the same as the role of A
g = re.search("^37", "370218198709279908")
print(g.group())  # Return to 37


# # groupdict Return the matching string in the form of a dictionary
# k = re.search("(?P<province>[0,9]{4})(?P<city>[0,9]{2})(?P<birthday>[0,9]{4})", "370218198709279908")
# print(k.groupdict())  # TODO Why do we return to AttributeError:'NoneType'object has no attribute'groupdict', the demonstration is in Xshell5 environment.


# # Search the IP address
# m = re.search("\d{1,3)\.\d{1,3}\.\d{1,3}\.\d{1,3}", "inet Address: 192.168.12.44 "" why did TODO report wrong? Demo is in Xshell5 environment.
# print(m)
# n = re.search("(\d{1,3)\.){3}", "inet Address: 192.168.12.44 ")
# print(n)


# findall
o = re.findall("\d+","abcdfewage25435r3")
print(o)  #Return to ['25435','3']

p = re.findall("[a-zA-Z]+]", "abjvldAFEWdfh2894AJlfd89AHvhk")
print(p)

q = re.findall("\D+]", "abjvldAFEWdfh2894AJlfd89AHvhk")
print(q)


# sub replace
r = re.sub("\d+","|","abjvldAFEWdfh2894AJlfd89AHvhk" )  # Replace the numbers with the numbers
print(r)  # Return to abjvldAFEWdfh|AJlfd|AHvhk


# split split
s = re.split("\\\\", r'c:\user\data\python35')
print(s)  # Return to ['c:','user','data','python35']

 

Regular regular expression symbols

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
'.'     The default match is any character except \n. If flag DOTALL is specified, it matches any character, including line changing.
'^'     Match characters at the beginning. If flags MULTILINE is specified, this can also be matched (R"^a","\nabc\neee",flags=re.MULTILINE)
'$'     Match the end of the character, or e.search ("foo$","bfoo\nsdfsf",flags=re.MULTILINE).group()It's fine too
'*'     matching*Characters before the number0At the second or more times, re.findall ("ab*","cabb3abcbbac")  The result was ['abb''ab''a']
'+'     Match the previous character1At the second or more times, re.findall ("ab+","ab+cd+abb+bba") Results ['ab''abb']
'?'     Match the previous character1Sub or0second
'{m}'   Match the previous character m times
'{n,m}' Match the previous character n to m times, re.findall ("ab{1,3}","abb abc abbcbbb") Result'abb''ab''abb']
'|'     Match the character of the left or the right, re.search ("abc|ABC","ABCBabcCD").group() Result'ABC'
'(...)' Group matching, re.search ("(abc){2}a(123|456)c""abcabca456c").group() Result abcabca456c
 
 
'\A'    Only from the beginning of the character, re.search ("\Aabc","alexabc") It's not matched.
'\Z'    Match the end of the character, with the $
'\d'    Matching number0-9
'\D'    Matching non numeric
'\w'    Match [A-Za-z0-9]
'\W'    Matching non [A-Za-z0-9]
's'     Match the blank character, \t, \n, \r, re.search ("\s+","ab\tc1\n3").group() Result'\t'
 
'(?P<name>...)' Packet matching re.search ("(?P<province>[0-9]{4})(?P<city>[0-9]{2})(?P<birthday>[0-9]{4})","371481199306143242").groupdict("city") Results {'province''3714''city''81''birthday''1993'}

  

The most commonly used matching syntax

1
2
3
4
5
re.match Start from scratch
re.search Match inclusion
re.findall Returns all matching characters to the elements in the list.
re.splitall Use the matched characters as a list separator
re.sub      Match character and replace

The plaguing of the backslash
As in most programming languages, the use of “” as an escape character in regular expressions can cause problems in backslashes. If you need to match the character “\” in the text, then the regular expression expressed in the programming language will require 4 backslashes “\ \ \”: the first two and the later two are used separatelyIn the programming language, it is converted into a backslash, converted to two backslashes, and then converted into a backslash in regular expressions. The native string in Python solves this problem well. The regular expression in this example can be represented by R \ “. Similarly, match a number “\ \”D “can be written as R” \d “. With native strings, you no longer need to worry about missing backslashes, and the expressions you write are more intuitive.

 

Only a few matching patterns that need to be lightly known

1
2
3
re.I(re.IGNORECASE): Ignore uppercase (parenthesis is complete, the same below).
M(MULTILINE): Multiline mode, change'^'and'$'Behavior (see above)
S(DOTALL): Point arbitrary matching mode, change'.'Act

  

 

Link of this Article: Python regular

Leave a Reply

Your email address will not be published. Required fields are marked *