《Linux Shell脚本攻略》 笔记
第四章:高效文本处理
1、IP地址的正则表达式: [0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}
2、grep用法
//在多级目录中对文本进行递归检索
[root@localhost program_test]# grep "yang" ./ -Rn
./test.txt:6:laoyang
./right.txt:1:1 yang man
//忽略大小写匹配
[root@localhost program_test]# echo hello world | grep -i "HELLO"
hello world
//递归搜索所有.c和.cpp文件
[root@localhost program_test]# grep "main()" . -r --include *.{c,cpp}
./hello.c:int main()
sin.c:int main()
hello.cpp:int main()
//匹配某个结果之后的几行
[root@localhost program_test]# echo -e "a\nb\nc\na\nb\nc"| grep a -A 1
a
b
--
a
b
3、cut命令
cut,将文本按照列进行切割的小工具。
//-d分界符; -f要提取的列
[root@localhost program_test]# cut -d ":" -f5 --complement passwd_yang
root:x:0:0:/root:/bin/bash
bin:x:1:1:/bin:/sbin/nologin
[root@localhost program_test]# cut -c1-5 passwd_yang
root:
bin:x
daemo
adm:x
//统计特定文件中的词频
[root@localhost program_test]# cat word_freq.sh
#!/bin/bash
if [ $# -ne 1 ];
then
echo "Usage: $0 filename"
exit -1
fi
filename=$1
egrep -o "\b[[:alpha:]]+\b" $filename | awk '{ count[$0]++ } END { printf("%-14s%s\n","word","Count");for(ind in count) { printf("%-14s%d\n",ind,count[ind]); } }'
4、sed命令(stream editor 流编辑器)
适用文本处理.
//1.替换,从第3个开始替换
[root@localhost program_test]# echo this thisthisthis | sed ‘s/this/THIS/3g‘
this thisTHISTHIS
//2.删掉空白行
[root@localhost program_test]# sed ‘/^$/d‘ choice.sh
//3.已匹配的字符串标记&
[root@localhost program_test]# echo this is an example | sed ‘s/\w\+/[&]/g‘
[this] [is] [an] [example]
//4.替换举例.
[root@localhost program_test]# cat sed_data.txt
11 abc 111 this 9 file contains 111 11 88 numbers 0000
[root@localhost program_test]# cat sed_data.txt | sed ‘s/\b[0-9]\{3\}\b/NUMBER3/g‘
11 abc NUMBER3 this 9 file contains NUMBER3 11 88 numbers 0000
5、awk工具,用于数据流,对列、行进行操作。
//1)、awk的实现方式
[root@localhost program_test]# echo -e "line1\nline2" | awk ‘BEGIN { print "begin...\n" } { print } END { print "end...\n" }‘
begin...
line1
line2
end...
//2)、awk实现累加求和
[root@localhost program_test]# seq 5 | awk ‘BEGIN { sum=0; print "summary:" } { print $1"+"; sum+=$1; } END { print "=="sum }‘
summary:
1+
2+
3+
4+
5+
==15
//3)、awk 设定定界符.
//-F 定界符 $NF 一行中的最后一个字段
[root@localhost program_test]# awk -F: ‘{ print $1 "\t" $NF }‘ /etc/passwd
root /bin/bash
bin /sbin/nologin
daemon /sbin/nologin
//4)、打印文件中的每个字母
[root@localhost program_test]# cat read_each_word.sh
cat hello.c | ( while read line;
do
#echo $line;
for word in $line;
do
#echo $word;
for((i=0;i<${#word};i++))
do
echo ${word:i:1} ;
done
done
done )
//5)、打印第4-6行内容
[root@localhost program_test]# seq 100 | awk ‘NR==4, NR==6‘
4
5
6
//6)、awk实现类似tac逆序的功能.
[root@localhost program_test]# seq 9 | awk '{ lifo[NR]=$0; lno=NR } END { print "NR = " NR; for(;lno>-1;lno--) { print lifo[lno]; } }'
NR = 9
9
8
7
6
5
4
3
2
1