Python - 正则表达式

时间：2021-04-24 13:40:39 阅读：0 评论：0 收藏：0 [点我收藏+]
  1 """
  2 正则表达式：regular expression
  3 
  4 本质: 可看成一个模板，按照这个模板规则去匹配
  5 
  6 是一个比较强大的字符串处理工具
  7 
  8 应用场景：
  9 1》表单验证
 10 2》爬虫
 11 3》处理文本和数据
 12 
 13 导入模块：re
 14 
 15 match() 匹配
 16 result = re.match(正则表达式，要匹配的字符串，flags=0) flags??
 17 -> 尝试从字符串的起始位置去匹配，返回match对象。如果匹配不成功，就返回None
 18 -> 获取返回值的内容: result.group()
 19 ======================================
 20 
 21 """
 22 
 23 import re
 24 
 25 # res = re.match(‘noise‘, ‘noise-abc‘)
 26 # print(res.group())    # noise
 27 
 28 
 29 
 30 """
 31 1> .  匹配1个字符(除了\n)
 32 2> [] 匹配[]中列举的一个字符
 33 3> \d 匹配数字
 34 4> \D 匹配非数字
 35 5> \s 匹配空白，即空格，tab
 36 6> \S 匹配非空白
 37 7> \w 匹配单词字符,a-z,A-Z,0-9,_,数字
 38 8> \W 匹配非单词字符
 39 
 40 """
 41 
 42 
 43 import re
 44 
 45 # 1> .  匹配1个字符(除了\n)
 46 
 47 # res1 = re.match(‘.‘, ‘a‘)
 48 # print(res1.group())   # a
 49 #
 50 # res11 = re.match(‘t.o‘, ‘too‘)
 51 # print(res11.group())  # too
 52 
 53 # 2> [] 匹配[]中列举的一个字符
 54 
 55 # res2 = re.match(‘[hH]‘, ‘Hello world h‘)
 56 # print(res2.group())     # H
 57 #
 58 # res22 = re.match(‘[0123456789]hello python‘, ‘7hello python‘)
 59 # print(res22.group())    # 7hello python
 60 #
 61 # res222 = re.match(‘[0-9]hello python‘, ‘7hello python‘)
 62 # print(res222.group())    # 7hello python
 63 #
 64 # res2222 = re.match(‘[0-35-9]hello python‘, ‘7hello python‘)   # 不匹配4
 65 # print(res2222.group())    # 7hello python
 66 
 67 # 3> \d 匹配数字
 68 
 69 # res3 = re.match(‘today is date 22 ?‘, ‘today is date 22 ?‘)
 70 # print(res3.group())    # today is date 22 (? 匹配不了）
 71 #
 72 # res31 = re.match(‘today is date 2\d ?‘, ‘today is date 24 ?‘)
 73 # print(res31.group())    # today is date 24 (? 匹配不了）
 74 
 75 
 76 # 5> \s 匹配空白，即空格，tab
 77 # 6> \S 匹配非空白
 78 # res5 = re.match(‘\st‘, ‘ today is date 22 ?‘)
 79 # print(res5.group())    #  t(t前面有一个空格）
 80 
 81 # res5 = re.match(‘\S‘, ‘today is date 22 ?‘)
 82 # print(res5.group())    # t
 83 
 84 
 85 
 86 
 87 # * 匹配 0-任意次
 88 
 89 import re
 90 #
 91 # res = re.match(‘[A-Z][a-z]*‘, ‘Noise123‘)   # 表示匹配第一个大写,第二个有*次为小写, *表示任意次
 92 # print(res.group())  # Noise
 93 
 94 # + 匹配前一个字符至少一次
 95 # res = re.match(‘[A-Za-z]+python‘, ‘worldpythonhello‘)   # python前面至少匹配一个字符
 96 # print(res.group())  # worldpython
 97 
 98 # ? 匹配前一个字符最多一次
 99 # res = re.match(‘[0-9]?[0-9]‘, ‘123456‘)   # 第一个[1-9]最多匹配一个,第二个是单个匹配字符
100 # print(res.group())  # 12
101 
102 # {m} 匹配前一个字符m次
103 # 匹配6位的数字支付密码
104 
105 # res = re.match(‘[0-9]{6}‘, ‘123456789asdasd‘)   #
106 # print(res.group())  # 123456
107 
108 res = re.match(‘[a-zA-Z0-9_]{8,20}‘, ‘212asda424asda4xzc‘)   # 最小八位最大20位
109 print(res.group())  # 212asda424asda4xzc
110 
111 
112 # ^ 匹配字符串开头
113 
114 import re
115 
116 # res = re.match(‘^ab‘, ‘abcdef‘)
117 # print(res.group())  # ab
118 
119 # res = re.match(‘^[0-9]‘, ‘12ab‘)
120 # print(res.group())  # 1
121 
122 # res = re.match(‘[^0-9]‘, ‘12ab‘)
123 # print(res.group())  # AttributeError: ‘NoneType‘ object has no attribute ‘group‘
124 #
125 
126 """
127 小结:
128 ‘abc‘ 表示字串有‘abc‘就匹配成功;
129 ‘[abc]‘表示‘a‘或‘b‘或‘c‘就匹配成功
130 ‘^abc‘表示‘abc‘开头就匹配成功
131 ‘^[abc]‘表示‘a‘或‘b‘或‘c‘开头就匹配成功
132 ‘[^abc]‘表示‘a‘‘b‘‘c‘以外的字符,才能匹配成功
133 """
134 
135 
136 # $ 匹配以结尾
137 
138 # res = re.match(‘[\w]*d$‘, ‘helloworld‘)     # 空格不在\w里面
139 # print(res.group())  # helloworld
140 
141 # res = re.match(‘[\w]*d$‘, ‘hello world‘)     # 空格不在\w里面
142 # print(res.group())  # AttributeError: ‘NoneType‘ object has no attribute ‘group‘
143 
144 # res = re.match(‘[0-9]?\d$‘, ‘10‘)
145 # print(res.group())  # 10
146 
147 # res = re.match(‘[0-9]?\d$‘, ‘108‘)  # ?前面最多一个字符,加上\d一个字符
148 # print(res.group())  # AttributeError: ‘NoneType‘ object has no attribute ‘group‘
149 
150 list1 = [‘mynextlife@163.com‘, ‘mynextlife@139.com‘, ‘mynextlife@169.com‘]
151 
152 # 匹配163结尾的邮箱
153 for i in list1:
154     res = re.match(‘[\w]*@139.com$‘, i)
155     # try:
156     #     print(‘matched  : {}‘.format(res.group()))
157     # except AttributeError:
158     #     print(f"unmatched: {i}")
159     if res:
160         print(‘matched  : {}‘.format(res.group()))
161     else:
162         print(f"unmatched: {i}")
163 
164 
165 
166 
167 
168 
169 
170 # | 或 :匹配左右任意一个表达式
171 
172 import re
173 
174 # res = re.match(‘[1-9]?\d$|123‘, ‘123‘)  # [1-9]?\d$ 最多两个字符,但是或|运算,用了123去匹配
175 # print(res.group())  # 123
176 
177 # res = re.match(‘[1-9]?\d$|\w*‘, ‘123youare‘)  # [1-9]?\d$ 最多两个字符,但是或|运算,用了\w*去匹配多个单词
178 # print(res.group())  # 123youare
179 
180 # () 分组
181 # 139邮箱 11位手机号,并且第一位为1
182 
183 # res = re.match(‘1\d{10}@139.com$‘, ‘12345678901@139.com‘)
184 # print(res.group())    # 12345678901@139.com
185 
186 # res = re.match(‘\w{4,20}@(139|163|qq).com$‘, ‘12345678901@qq.com‘)
187 # print(res.group())  # 12345678901@qq.com
188 
189 
190 # 不以 4,7结尾的4位数字号码
191 #
192 # res =re.match(‘\d{3}[^47]‘)
193 
194 
195 # 提取座机号,区号
196 # res = re.match(‘\d{3}-\d{7}‘, ‘110-1234567aaaaa‘)
197 # print(res.group())
198 
199 # res = re.match(‘(\d{3})-(\d{7})‘, ‘110-1234567aaaaa‘)
200 # print(res.group(0))   # 0 是全部匹配      110-1234567
201 # print(res.group(1))   # 1 是匹配第1个分组  110
202 # print(res.group(2))   # 2 是匹配第2个分组  1234567
Python - 正则表达式
标签：模块字符一个 span att 处理工具不成功 reg bcd
原文地址：https://www.cnblogs.com/noise/p/14695842.html
踩
(0)
评论一句话评论（0）
分享档案
更多>
2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)
周排行