‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘ >>文件: 字符串处理.py >>作者: liu yang >>邮箱: liuyang0001@outlook.com >>博客: www.cnblogs.com/liu66blog ‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘‘ #!/usr/bin/env python # -*- coding: utf-8 -*- import sys, os # 1.字符串的连接和合并 # 相加 //两个字符串可以很方便的通过‘+‘连接起来 str1=‘123‘ str2=‘456‘ str3=str1+str2 print(str3) # -----------输出---------------------- # 123456 # ------------------------------------ # 合并//用join方法 url=[‘www‘,‘cnblog‘,‘com/liu66blog‘] print(‘.‘.join(url)) # -----------输出---------------------- # www.cnblog.com/liu66blog # ------------------------------------ # 2.字符串的切片和相乘 # 相乘//比如写代码的时候要分隔符,用python很容易实现 Separator=‘*‘*30 print(Separator) # -----------输出---------------------- # ****************************** # ------------------------------------ # 切片操作 url=‘www.cnblogs.com/liu66blog‘ # 取下标0-15个字符 print(url[0:16]) # 取下标16-最后一个 print(url[16:]) # 取倒数第四个到最后 print(url[-4:]) # 复制字符串 print(url[::]) # -----------输出---------------------- # www.cnblogs.com/ # liu66blog # blog # www.cnblogs.com/liu66blog # ------------------------------------ # 3.字符串的分割 # 普通的分割,用split # split只能做非常简单的分割,而且不支持多个分隔 url=‘www.cnblogs.com/liu66blog‘ url_list=url.split(‘.‘) print(url_list) # -----------输出---------------------- # [‘www‘, ‘cnblogs‘, ‘com/liu66blog‘] # ------------------------------------ # 复杂的分割 # r表示不转义,分隔符可以是;或者,或者/,或者空格后面跟0个多个额外的空格,然后按照这个模式去分割 url=‘www.cnblogs.com/liu66blog‘ import re url_list=re.split(r‘[.;/]\s*‘,url) print(url_list) # -----------输出---------------------- # [‘www‘, ‘cnblogs‘, ‘com‘, ‘liu66blog‘] # ------------------------------------ # 4.字符串的开头和结尾的处理 # 比方我们要查一个名字是以什么开头或者什么结尾 url=‘www.cnblogs.com/liu66blog‘ result=url.endswith(‘blog‘) print(result) result=url.startswith(‘ww.‘) print(result) # -----------输出---------------------- # True # False # ------------------------------------ # 5.字符串的查找和匹配 # 一般查找 # 我们可以很方便的在长的字符串里面查找子字符串,会返回子字符串所在位置的索引, 若找不到返回-1 url=‘www.cnblogs.com/liu66blog‘ result=url.find(‘liu66‘) print(result) result=url.find(‘liuyang‘) print(result) # -----------输出---------------------- # 16 # -1 # ------------------------------------ # 复杂查找 data_str=‘2018/2/22‘ result=re.match(r‘\d+/\d+/\d+‘,data_str) if result: print(‘ok,存在‘) # -----------输出---------------------- # ok,存在 # ------------------------------------ # 6.字符串的替换 # 普通的替换//用replace就可以 url=‘www.cnblogs.com/liu66blog‘ url_new=url.replace(‘www.‘,‘‘) print(url_new) # -----------输出---------------------- # cnblogs.com/liu66blog # ------------------------------------ # 复杂的替换 利用re.sub函数 url=‘www.cnblogs.com/liu66blog‘ url_new=re.sub(r‘\d\d‘,‘00‘,url) print(url_new) # -----------输出---------------------- # cnblogs.com/liu00blog # ------------------------------------ # 7.字符串中去掉一些字符 # 去除空格//对文本处理的时候比如从文件中读取一行,然后需要去除每一行的两侧的空格,table或者是换行符 url=‘ www.cnblogs.com/liu66blog ‘ url_new=url.strip() print(url_new) # 复杂的文本清理,可以利用str.translate, # 先构建一个转换表,table是一个翻译表,表示把‘w‘转成大写的‘W‘, # 然后在old_str里面去掉‘liu66‘,然后剩下的字符串再经过table 翻译 # Python3.4已经没有string.maketrans()了,取而代之的是内建函数: # bytearray.maketrans()、bytes.maketrans()、str.maketrans() url=‘www.cnblogs.com/liu66blog‘ # 创建翻译表 instr=‘w‘ outstr=‘W‘ table=str.maketrans(instr,outstr) url_new=url.translate(table) print(url_new) # -----------输出---------------------- # WWW.cnblogs.com/liu66blog # ------------------------------------ # 8.找最长的单词 txt=‘Python is a programming language that lets you work more quickly and integrate your systems more effectively. ‘ ‘You can learn to use Python and see almost immediate gains in productivity and lower maintenance costs. ‘ ‘Learn more about Python..‘ # 使用空格分隔 txt_list=txt.split(‘ ‘) # 使用sorted()函数按照单词长度排序 txt_list_new=sorted(txt_list,key=lambda x:len(x),reverse=True) # 定义一个空列表,存储最长的 longest_word=[] # 判断后面的单词长度 for i,word in enumerate(txt_list_new): if len(txt_list_new[i])<len(txt_list_new[0]): break else: longest_word.append(txt_list_new[i]) print(longest_word) # -----------输出---------------------- # [‘effectively.‘, ‘productivity‘] # ------------------------------------ # 9.找出指定长度的单词 len_4_word=filter(lambda x:5>len(x)>=4,txt_list) # 注意python3 filter返回不再是列表 需要自己转换!! len_4_word_list=list(len_4_word) # 转换成去重元祖 len_4_word_tuple=tuple(set(len_4_word_list)) print(len_4_word_list) print(len_4_word_tuple) # -----------输出---------------------- # [‘that‘, ‘lets‘, ‘work‘, ‘more‘, ‘your‘, ‘more‘, ‘more‘] # (‘your‘, ‘more‘, ‘lets‘, ‘that‘, ‘work‘) # ------------------------------------ # 10.使用最频繁的单词 from collections import Counter # most_common(x) x代表列举的个数 print(Counter(txt_list).most_common(6)) # -----------输出---------------------- # [(‘more‘, 3), (‘and‘, 3), (‘Python‘, 2), (‘is‘, 1), (‘a‘, 1), (‘programming‘, 1)] # ------------------------------------ # 11.列出所有大写的单词 title_words_list=[] for i in txt_list: if i.istitle(): title_words_list.append(i) # 得到去重字典 title_words_dict=set(title_words_list) print(title_words_list) print(title_words_dict) # -----------输出---------------------- # [‘Python‘, ‘You‘, ‘Python‘, ‘Learn‘, ‘Python..‘] # {‘Python..‘, ‘Learn‘, ‘Python‘, ‘You‘} # ------------------------------------ # 12.未完待续...