re模块的应用

时间：2018-08-07 20:27:18 阅读：122 评论：0 收藏：0 [点我收藏+]

import re
# 正则表达式中的转义 :
# ‘\(‘  表示匹配小括号
# [()+*?/$.]   在字符组中一些特殊的字符会现出原形
# 所有的 \w \d \s(\n,\t, )  \W \D \S都表示它原本的意义
# [-]只有写在字符组的首位的时候表示普通的减号
#    写在其他位置的时候表示范围[1-9]
#    如果就是想匹配减号 [1\-9]

# 在python中使用正则表达式
    # 转义符 : 在正则中的转义符 \ 在python中的转义符
    # re模块
        # findall search match
# ret=re.findall("\d+","weiydga16278364ahdiui7733")
# print(ret)
# print(ret)
# ret1=re.search("\d+","weiydga16278364ahdiui7733")
# print(ret1)
# if ret1:print(ret1.group())
# ret2=re.match("\d+","weiydga16278364ahdiui7733")
# print(ret2)
# if ret2:print(ret2.group())


# sub subn split模块用法
# ret=re.sub("\d+","H","weiydga16278364ahdiui7733",1)
# print(ret)
# ret1=re.subn("\d+","H","weiydga16278364ahdiui7733")
# print(ret1)
# ret2=re.split("\d+","weiydga16278364ahdiui7733")
# print(ret2)

# compile finditer模块用法:compile节省时间效率,finditer节省空间效率
# ret3=re.compile("\d+")
# ret4=ret3.search("weiydga16278364ahdiui7733")
# print(ret4.group())
# ret5=re.finditer("\d+","weiydga16278364ahdiui7733")
# for r in ret5:
#     print(r.group())
        # flags有很多可选值：compile中的参数
# re.I(IGNORECASE)忽略大小写，括号内是完整的写法
# re.M(MULTILINE)多行模式，改变^和$的行为
# re.S(DOTALL)点可以匹配任意字符，包括换行符
# re.L(LOCALE)做本地化识别的匹配，表示特殊字符集 \w, \W, \b, \B, \s, \S 依赖于当前环境，不推荐使用
# re.U(UNICODE) 使用\w \W \s \S \d \D使用取决于unicode定义的字符属性。在python3中默认使用该flag
# re.X(VERBOSE)冗长模式，该模式下pattern字符串可以是多行的，忽略空白字符，并可以添加注释

# python中的正则表达式:
        # findall 会优先显示分组中的内容,要想取消分组优先,(?:正则表达式)
# ret=re.compile(‘-0\.\d+|-[1-9]\d*(?:\.\d+)?‘)
# c1=ret.findall(‘-1asdada-200‘)
# print(c1)
# ret=re.compile(‘\d+(?:\.\d+)|(\d+)‘)
# ret1=ret.findall("1-20*(40.25-23)+25")
# print(ret1)
# ret1.remove("")
# print(ret1)

        # split 遇到分组 会保留分组内被切掉的内容
# ret=re.split("(\d+)","eyau123ausui234aips")#保留了数字
# print(ret)

        # search 如果search中有分组的话,通过group(n)就能够拿到group中的匹配的内容
#         匹配标签,匹配整数,数字匹配,爬虫
# ret=re.search("<(\w+)>\w+<(\w+)>\w+<(\w+)>","<a>asd<b>aasd<c>")
# print(ret.group())
# print(ret.group(1))
# print(ret.group(2))
# print(ret.group(3))
#       search的分组命名:1.(?P<命名>)2.\1
# ret=re.search("<(?P<name>\w+)>\w+</(?P=name)>","<h>adcf</h>")
# print(ret.group())
# print(ret.group("name"))

# ret=re.search(r"<(\w+)>\w+</(\1)>","<h>adcf</h>")
# print(ret.group())
# print(ret.group(1))

# ret=re.search("<(?P<name>\w+)>\w+<(?P<sex>\w+)>\w+<(?P<age>\w+)>","<a>asd<b>aasd<c>")
# print(ret.group())
# print(ret.group(‘name‘))
# print(ret.group(‘sex‘))
# print(ret.group(‘age‘))

# 爬虫代码块:
# from urllib.request import urlopen
# # # 内置的包 来获取网页的源代码 字符串
# # res = urlopen(‘https://www.douban.com/‘)
# # print(res.read().decode(‘utf-8‘))
# from urllib.request import urlopen
# res=urlopen("baidu")
# print(res.red.decode("utf-8"))

# 分别使用while循环,和for循环计算1-3+5-7+9-11...99的结果

# print(re.findall(r‘\\c‘,"asnadb\c"))

re模块的应用

标签：位置 lib python3 表达推荐 com 完整表达式多行

原文地址：https://www.cnblogs.com/zhangdaye/p/9438862.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行