标签:字符 span asd encoding baidu ^c 提取 style 特殊
# encoding:utf8 import re # findall方法,查找全部匹配到的结果 s = ‘abcabbca3c‘ print(re.findall(‘ab‘, s)) # 结果为:[‘ab‘, ‘ab‘] # . 单个字符 s = ‘abcabbca3c‘ print(re.findall(‘a.c‘, s)) # 结果为:[‘abc‘, ‘a3c‘] # ?前面一个字符匹配0或1次 s = ‘2acbabcabbacc‘ print(re.findall(‘ab?‘, s)) # 结果为:[‘a‘, ‘ab‘, ‘ab‘, ‘a‘] # + 前面一个字符匹配1~+∞次 s = ‘2acbabcabbacc‘ print(re.findall(‘ab+‘, s)) # 结果为:[‘ab‘, ‘abb‘] # * 前面一个字符匹配0~+∞次 s = ‘2acbabcabbacc‘ print(re.findall(‘ab*‘, s)) # 结果为:[‘a‘, ‘ab‘, ‘abb‘, ‘a‘] # ^ 以^后面的字符串开头,精准匹配 s = ‘acbabcacbbacc‘ print(re.findall(‘^acb‘, s)) # 结果为:[‘acb‘] print(re.findall(‘^abc‘, s)) # 结果为:[] # $ 以$前面的字符串开头,精准匹配 s = ‘acbabcacbbacc‘ print(re.findall(‘acc$‘, s)) # 结果为:[‘acc‘] print(re.findall(‘abc$‘, s)) # 结果为:[] # {} 指定匹配次数。{3}前面的字符匹配3次;{1,}同+,前面的字符匹配1~+∞次;{1,3}前面的字符匹配1-3次 s = ‘2aabcabbacabbbc‘ print(re.findall(‘ab{2}‘, s)) # 结果为:[‘abb‘, ‘abb‘] print(re.findall(‘ab{1,}‘, s)) # 结果为:[‘ab‘, ‘abb‘, ‘abbb‘] print(re.findall(‘ab{2,3}‘, s)) # 结果为:[‘abb‘, ‘abbb‘] # () 吧()内的字符串作为整体。优先完全匹配;可通过?:来全部匹配 s = ‘2aabcabbacabbbc‘ print(re.findall(‘(ab)*‘, s)) # 匹配0到多次,结果为:[‘‘, ‘‘, ‘ab‘, ‘‘, ‘ab‘, ‘‘, ‘‘, ‘‘, ‘ab‘, ‘‘, ‘‘, ‘‘, ‘‘] print(re.findall(‘(ab)+‘, s)) # 匹配1到多次,结果为:[‘ab‘, ‘ab‘, ‘ab‘] s = ‘abcababcababab‘ print(re.findall(‘(ab)‘, s)) # 整体匹配,结果为:[‘ab‘, ‘ab‘, ‘ab‘, ‘ab‘, ‘ab‘, ‘ab‘] print(re.findall(‘(?:ab)+‘, s)) # 整体全部匹配,结果为:[‘ab‘, ‘abab‘, ‘ababab‘] print(re.findall(‘(?:ab)+‘, ‘abababab‘)) # 整体全部匹配, 结果为:[‘abababab‘] print(re.findall(‘(ab)+‘, ‘abababab‘)) # 整体完全匹配,结果为:[‘ab‘] # | 吧|左右的字符串作为整体匹配,若匹配成功左侧,则不再匹配右侧 s = ‘abcdacd‘ print(re.findall(‘abc|cd‘, s)) # 优先匹配左侧,结果为[‘abc‘, ‘cd‘] print(re.findall(‘ab|cd‘, s)) # 结果为[‘ab‘, ‘cd‘] print(re.findall(‘abc|abcd‘, ‘abcdabc‘)) # 优先匹配左侧,结果是[‘abc‘,‘abc‘] # [] 吧[]内的字符集单独匹配,[]内的字符均当做普通字符处理;^表示不含 s = ‘abcdabdacdbad‘ print(re.findall(‘a[bc]d‘, s)) # 结果是[‘abd‘, ‘acd‘] print(re.findall(‘a[b*]d‘, ‘abcabdacca*da‘)) # 结果是[‘abd‘, ‘a*d‘] # 在[]内写^表示不包含字符 print(re.findall(‘a[^cb]d‘, ‘aedacdahdabdasd‘)) # 结果是[‘aed‘, ‘ahd‘, ‘asd‘] print(re.findall(‘a[^a-z]c‘, ‘asa2a3c4c‘)) # 结果是[‘a3c‘] print(re.findall(‘\([^()]+\)‘, ‘1+7-(3*(6-2)-4)‘)) # 提取最里面的括号和内容,结果是[‘(6-2)‘] # 查找特殊字符例如*+().等,需要转义,使用\或者r‘待查找字符串‘ print(re.findall(‘www.baidu‘, ‘www.163www.baidu.comwwwabaidu‘)) # 结果是[‘www.baidu‘, ‘wwwabaidu‘],会默认.为一个字符 print(re.findall(‘www\.baidu‘, ‘www.163www.baidu.com‘)) # 结果是[‘www.baidu‘] print(re.findall(r‘www.baidu‘, ‘www.163www.baidu.com‘)) # 结果是[‘www.baidu‘] # 在[]内写范围,比如0-9,比如a-z print(re.findall(‘a[1-3]c‘, ‘abca1caesa3cas‘)) # 结果是[‘a1c‘, ‘a3c‘] print(re.findall(‘a[1-3a-zA-Z]c‘, ‘abca1caDsascaFc‘)) # 结果是[‘abc‘, ‘a1c‘, ‘asc‘, ‘aFc‘] # 特殊字符 \d 表示[0-9],\D表示除去前面的范围剩下的 print(re.findall(‘\d‘, ‘ab123c.d32a+"3452d‘)) # 结果是[‘1‘, ‘2‘, ‘3‘, ‘3‘, ‘2‘, ‘3‘, ‘4‘, ‘5‘, ‘2‘] print(re.findall(‘\d+‘, ‘ab123c.d32a+"3452d‘)) # 结果是[‘123‘, ‘32‘, ‘3452‘] print(re.findall(‘\D‘, ‘ab123c.d32a+"3452d‘)) # 结果是[‘a‘, ‘b‘, ‘c‘, ‘.‘, ‘d‘, ‘a‘, ‘+‘, ‘"‘, ‘d‘] print(re.findall(‘\D+‘, ‘ab123c.d32a+"3452d‘)) # 结果是[‘ab‘, ‘c.d‘, ‘a+"‘, ‘d‘] # 特殊字符 \w表示[0-9a-zA-Z_]以及中文,\W表示除去前面范围剩下的 print(re.findall(‘\w+‘, ‘ab12我3_c.d3是2a+"34_52d‘)) # 结果是[‘ab12我3_c‘, ‘d3是2a‘, ‘34_52d‘] print(re.findall(‘\W+‘, ‘ab12我3_c.d3是2a+"34_52d‘)) # 结果是[‘.‘, ‘+"‘]
标签:字符 span asd encoding baidu ^c 提取 style 特殊
原文地址:https://www.cnblogs.com/wjlv/p/11011088.html