标签:style blog ar color sp for on 数据 div
目前最流行的做机器学习的语言当属python了,两大库numpy和scipy十分强大。但本人学习perl的初衷是做爬虫,但做数据挖掘又不得不碰到科学计算,而perl中没有一个像numpy和scipy这么强大的module,更多的是零散的分布在许多module中,所以只能自己写点函数,以备自用。学习perl大约5个月的时间,所以以下代码存在许多不足之处,请大家多多见谅,能提意见更佳。
本节主要介绍以下几个功能:
1 #ex1:向量求和函数 2 my @ex1=1..5; 3 print &sum(\@ex1),"\n"; 4 5 sub sum{ 6 my($vec)=@_; 7 my $result=0; 8 foreach(@$vec){ 9 $result+=$_; 10 } 11 return $result; 12 } 13 14 #ex2:向量均值函数 15 my @ex2=1..5; 16 print &mean(\@ex2),"\n"; 17 18 sub mean{ 19 my($vec)=@_; 20 my $sum=0; 21 foreach(@$vec){ 22 $sum+=$_; 23 } 24 my $result=$sum/(scalar @$vec); 25 return $result; 26 } 27 28 29 #ex3:向量样本方差函数 30 my @ex3=1..5; 31 print &var(\@ex3),"\n"; 32 33 sub var{ 34 my($vec)=@_; 35 my $m=&mean($vec); 36 my $length=scalar @$vec; 37 my @squre=map(($_-$m)*($_-$m),@$vec); 38 my $result=1/($length-1)*&sum(\@squre); 39 return $result; 40 } 41 42 #ex4:向量样本标准差函数 43 my @ex4=1..5; 44 print &sd(\@ex4),"\n"; 45 46 sub sd{ 47 my($vec)=@_; 48 my $result=sqrt(&var($vec)); 49 return $result; 50 } 51 52 #ex5:求相关系数 53 my @ex51=(2,3,4,9,10); 54 my @ex52=1..5; 55 print &corr(\@ex51,\@ex52),"\n"; 56 57 sub corr{ 58 my($v1,$v2)=@_; 59 my $s=0; 60 my $length=scalar @$v1; 61 my $m1=&mean($v1); 62 my $m2=&mean($v2); 63 for(my $i=0;$i<$length;$i++){ 64 $s+=($v1->[$i]-&mean($v1))*($v2->[$i]-&mean($v2)); 65 } 66 my $result=$s/(&sd($v1)*&sd($v2)*($length-1)); 67 return $result; 68 } 69 70 71 #求偏度系数 72 my @ex6=(2,3,4,9,10,12); 73 print &skew(\@ex6),"\n"; 74 75 sub skew{ 76 my($vec)=@_; 77 if(scalar @$vec<3){ 78 die "vector length must be larger than 3!"; 79 }else{ 80 my $m=&mean($vec); 81 my $n=scalar @$vec; 82 my @power=map(($_-$m)*($_-$m)*($_-$m),@$vec); 83 my $result=($n*&sum(\@power))/(($n-1)*($n-2)*&sd($vec)*&sd($vec)*&sd($vec)); 84 return $result; 85 } 86 } 87 88 #求峰度系数 89 my @ex7=(1,9,4,9,10,12); 90 print &kurt(\@ex7),"\n"; 91 sub kurt{ 92 my($vec)=@_; 93 my $m=&mean($vec); 94 my $n=scalar @$vec; 95 my @power=map(($_-$m)*($_-$m)*($_-$m)*($_-$m),@$vec); 96 my $result=&sum(\@power)/(($n-1)*&sd($vec)*&sd($vec)*&sd($vec)*&sd($vec)); 97 return $result; 98 }
标签:style blog ar color sp for on 数据 div
原文地址:http://www.cnblogs.com/datacatcher/p/4138976.html