标签:
例子:天气预报数据
> G=rep(c(1,2),c(10,10))
> G
[1] 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2
> x1=c(-1.9,-6.9,5.2,5.0,7.3,6.8,0.9,-12.5,1.5,3.8,0.2,-0.1,0.4,2.7,2.1,-4.6,-1.7,-2.6,2.6,-2.8)
> x2=c(3.2,0.4,2.0,2.5,0.0,12.7,-5.4,-2.5,1.3,6.8,6.2,7.5,14.6,8.3,0.8,4.3,10.9,13.1,12.8,10.0)
> a=data.frame(G,x1,x2)
> plot(x1,x2)
> text(x1,x2,G,adj=-0.5)
> library(MASS)
> ld=lda(G~x1+x2)
> ld
Call:
lda(G ~ x1 + x2)
Prior probabilities of groups:
1 2
0.5 0.5
Group means:
x1 x2
1 0.92 2.10
2 -0.38 8.85
Coefficients of linear discriminants:
LD1
x1 -0.1035305
x2 0.2247957
> z=predict(ld)
> newG=z$class
> newG
[1] 1 1 1 1 1 2 1 1 1 1 2 2 2 2 1 2 2 2 2 2
Levels: 1 2
> y=cbind(G,z$x,newG)
> y
G LD1 newG
1 1 -0.28674901 1
2 1 -0.39852439 1
3 1 -1.29157053 1
4 1 -1.15846657 1
5 1 -1.95857603 1
6 1 0.94809469 2
7 1 -2.50987753 1
8 1 -0.47066104 1
9 1 -1.06586461 1
10 1 -0.06760842 1
11 2 0.17022402 2
12 2 0.49351760 2
13 2 2.03780185 2
14 2 0.38346871 2
15 2 -1.24038077 1
16 2 0.24005867 2
17 2 1.42347182 2
18 2 2.01119984 2
19 2 1.40540244 2
20 2 1.33503926 2
discriminiant.distance <- function
(TrnX1, TrnX2, TstX = NULL, var.equal = FALSE){
if (is.null(TstX) == TRUE) TstX <- rbind(TrnX1,TrnX2)
if (is.vector(TstX) == TRUE) TstX <- t(as.matrix(TstX))
else if (is.matrix(TstX) != TRUE)
TstX <- as.matrix(TstX)
if (is.matrix(TrnX1) != TRUE) TrnX1 <- as.matrix(TrnX1)
if (is.matrix(TrnX2) != TRUE) TrnX2 <- as.matrix(TrnX2)
nx <- nrow(TstX)
blong <- matrix(rep(0, nx), nrow=1, byrow=TRUE,
dimnames=list("blong", 1:nx))
mu1 <- colMeans(TrnX1); mu2 <- colMeans(TrnX2)
if (var.equal == TRUE|| var.equal == T){
S <- var(rbind(TrnX1,TrnX2))
w <- mahalanobis(TstX, mu2, S)
- mahalanobis(TstX, mu1, S)
}
else{
S1 < -var(TrnX1); S2 <- var(TrnX2)
w <- mahalanobis(TstX, mu2, S2)
- mahalanobis(TstX, mu1, S1)
}
for (i in 1:nx){
if (w[i] > 0)
blong[i] <- 1
else
blong[i] <- 2
}
blong
}
在研究砂基液化问题中,选了七个因子,今从已液化和未液化的地层中分别抽了12个和23个样本,数据列在下表中,其中类表示已液化类,类表示未液化类。试建立距离判别的判别准则,并按判别准则对原35个样本进行回代(即按判别准则进行分类),分析误判情况。
编号 | 类别 | |||||||
---|---|---|---|---|---|---|---|---|
1 | 6.6 | 39 | 1.0 | 6.0 | 6 | 0.12 | 20 | |
2 | 6.6 | 39 | 1.0 | 6.0 | 12 | 0.12 | 20 | |
3 | 6.1 | 47 | 1.0 | 6.0 | 6 | 0.08 | 12 | |
4 | 6.1 | 47 | 1.0 | 6.0 | 12 | 0.08 | 12 | |
5 | 8.4 | 32 | 2.0 | 7.5 | 19 | 0.35 | 75 | |
6 | 7.2 | 6 | 1.0 | 7.0 | 28 | 0.30 | 30 | |
7 | 8.4 | 113 | 3.5 | 6.0 | 18 | 0.15 | 75 | |
8 | 7.5 | 52 | 1.0 | 6.0 | 12 | 0.16 | 40 | |
9 | 7.5 | 52 | 3.5 | 7.5 | 6 | 0.16 | 40 | |
10 | 8.3 | 113 | 0.0 | 7.5 | 35 | 0.12 | 180 | |
12 | 7.8 | 172 | 1.5 | 3.0 | 15 | 0.21 | 45 | |
13 | 8.4 | 32 | 1.0 | 5.0 | 4 | 0.35 | 75 | |
14 | 8.4 | 32 | 2.0 | 9.0 | 10 | 0.35 | 75 | |
15 | 8.4 | 32 | 2.5 | 4.0 | 10 | 0.35 | 75 | |
16 | 6.3 | 11 | 4.5 | 7.5 | 3 | 0.20 | 15 | |
17 | 7.0 | 8 | 4.5 | 4.5 | 9 | 0.25 | 30 | |
18 | 7.0 | 8 | 6.0 | 7.5 | 4 | 0.25 | 30 | |
19 | 7.0 | 8 | 1.5 | 6.0 | 1 | 0.25 | 30 | |
20 | 8.3 | 161 | 1.5 | 4.0 | 4 | 0.08 | 70 | |
21 | 8.3 | 161 | 0.5 | 2.5 | 1 | 0.08 | 70 | |
22 | 7.2 | 6 | 3.5 | 4.0 | 12 | 0.30 | 30 | |
23 | 7.2 | 6 | 1.0 | 3.0 | 3 | 0.30 | 30 | |
24 | 7.2 | 6 | 1.0 | 6.0 | 5 | 0.30 | 30 | |
25 | 5.5 | 6 | 2.5 | 3.0 | 7 | 0.18 | 18 | |
26 | 8.4 | 113 | 3.5 | 4.5 | 6 | 0.15 | 75 | |
27 | 8.4 | 113 | 3.5 | 4.5 | 8 | 0.15 | 75 | |
28 | 7.5 | 52 | 1.0 | 6.0 | 6 | 0.16 | 40 | |
29 | 7.5 | 52 | 1.0 | 7.5 | 8 | 0.16 | 40 | |
30 | 8.3 | 97 | 0.0 | 6.0 | 5 | 0.15 | 180 | |
31 | 8.3 | 97 | 2.5 | 6.0 | 5 | 0.15 | 180 | |
32 | 8.3 | 89 | 0.0 | 6.0 | 10 | 0.16 | 180 | |
33 | 8.3 | 56 | 1.5 | 6.0 | 13 | 0.25 | 180 | |
34 | 7.8 | 172 | 1.0 | 3.5 | 6 | 0.21 | 45 | |
35 | 7.8 | 283 | 1.0 | 4.5 | 6 | 0.18 | 45 |
解:输入数据,调用函数discriminiant.distance()进行判别,分别考虑两总体协方差阵不同的情况
> classX1<-data.frame(
+ x1=c(6.60, 6.60, 6.10, 6.10,8.40,7.2,8.40,7.50,
+ 7.50, 8.30, 7.80, 7.80),
+ x2=c(39.00,39.00, 47.00, 47.00, 32.00,6.0, 113.00, 52.00,
+ 52.00,113.00,172.00,172.00),
+ x3=c(1.00, 1.00, 1.00, 1.00,2.00, 1.0, 3.50, 1.00,
+ 3.50, 0.00, 1.00, 1.50),
+ x4=c(6.00, 6.00, 6.00, 6.00,7.50, 7.0, 6.00, 6.00,
+ 7.50, 7.50, 3.50, 3.00),
+ x5=c(6.00, 12.00,6.00, 12.00, 19.00, 28.0,18.00, 12.00,
+ 6.00, 35.00, 14.00, 15.00),
+ x6=c(0.12,0.12,0.08,0.08,0.35,0.3,0.15,0.16,
+ 0.16,0.12,0.21,0.21),
+ x7=c(20.00,20.00,12.00,12.00,75.00,30.0,75.00,40.00,
+ 40.00,180.00,45.00,45.00)
+ )
>> classX2<-data.frame(
+ x1=c(8.40, 8.40, 8.40, 6.3, 7.00, 7.00, 7.00, 8.30,
+ 8.30, 7.2, 7.2, 7.2, 5.50, 8.40, 8.40, 7.50,
+ 7.50, 8.30, 8.30, 8.30, 8.30, 7.80, 7.80),
+ x2=c(32.0 ,32.00, 32.00, 11.0, 8.00, 8.00, 8.00,161.00,
+ 161.0,6.0,6.0,6.0, 6.00,113.00,113.00, 52.00,
+ 52.00, 97.00, 97.00,89.00,56.00,172.00,283.00),
+ x3=c(1.00, 2.00, 2.50, 4.5, 4.50, 6.00, 1.50, 1.50,
+ 0.50, 3.5, 1.0, 1.0, 2.50, 3.50, 3.50, 1.00,
+ 1.00, 0.00, 2.50, 0.00, 1.50, 1.00, 1.00),
+ x4=c(5.00, 9.00, 4.00, 7.5, 4.50, 7.50, 6.00, 4.00,
+ 2.50, 4.0, 3.0, 6.0, 3.00, 4.50, 4.50, 6.00,
+ 7.50, 6.00, 6.00, 6.00, 6.00, 3.50, 4.50),
+ x5=c(4.00, 10.00, 10.00,3.0, 9.00, 4.00, 1.00, 4.00,
+ 1.00, 12.0,3.0,5.0, 7.00, 6.00, 8.00, 6.00,
+ 8.00, 5.00, 5.00,10.00,13.00, 6.00, 6.00),
+ x6=c(0.35, 0.35, 0.35, 0.2, 0.25, 0.25, 0.25, 0.08,
+ 0.08, 0.30, 0.3, 0.3, 0.18, 0.15, 0.15, 0.16,
+ 0.16, 0.15, 0.15, 0.16, 0.25, 0.21, 0.18),
+ x7=c(75.00,75.00, 75.00, 15.0,30.00, 30.00, 30.00, 70.00,
+ 70.00, 30.0,30.0, 30.0,18.00, 75.00, 75.00, 40.00,
+ 40.00,180.00,180.00,180.00,180.00,45.00,45.00)
+ )
> source("discriminiant.distance.R")
> discriminiant.distance(classX1, classX2, var.equal=TRUE)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
blong 1 1 1 1 1 1 1 1 2 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
28 29 30 31 32 33 34 35
blong 2 2 2 2 2 2 2 2
在认为两总体协方差阵相同的情况下,将训练样本回代进行判别,有三个点判错,分别是第9号样本、第28号样本和第29号样本。
在认为两总体协方差阵不同的情况下,将训练样本回代进行判别,只有一个点判错,是第9号样本。
下表是某气象站预报有无春旱的实际资料,与是综合预报因子(气象含义略),有春旱的是6个年份的资料,无春旱的是8个年份的资料,它们的先验概率分别用6/14和8/14来估计,并假设误判损失相等。试用Bayes估计对数据进行分析
序号 | 春旱 | 春旱 | 无春旱 | 无春旱 |
---|---|---|---|---|
1 | 24.8 | -2.0 | 22.1 | -0.7 |
2 | 24.1 | -2.4 | 21.6 | -1.4 |
3 | 26.6 | -3.0 | 22.0 | -0.8 |
4 | 23.5 | -1.9 | 22.8 | -1.6 |
5 | 25.5 | -2.1 | 22.7 | -1.5 |
6 | 27.4 | -3.1 | 21.5 | -1.0 |
7 | 22.1 | -1.2 | ||
8 | 21.4 | -1.3 |
解:输入数据(按矩阵形式),再调用函数discriminiant.bayes()
进行判别
> TrnX1<-matrix(
+ c(24.8, 24.1, 26.6, 23.5, 25.5, 27.4,
+ -2.0, -2.4, -3.0, -1.9, -2.1, -3.1),
+ ncol=2)
> TrnX2<-matrix(
+ c(22.1, 21.6, 22.0, 22.8, 22.7, 21.5, 22.1, 21.4,
+ -0.7, -1.4, -0.8, -1.6, -1.5, -1.0, -1.2, -1.3),
+ ncol=2)
> source("discriminiant.bayes.R")
> #### 样本协方差相同
>
> discriminiant.bayes(TrnX1, TrnX2, rate=8/6, var.equal=TRUE)
1 2 3 4 5 6 7 8 9 10 11 12 13 14
blong 1 1 1 2 1 1 2 2 2 2 2 2 2 2
>#第4号样本被错判
> #### 样本协方差不同
>
> discriminiant.bayes(TrnX1, TrnX2, rate=8/6)
1 2 3 4 5 6 7 8 9 10 11 12 13 14
blong 1 1 1 1 1 1 2 2 2 2 2 2 2 2
所有样本回代全部正确
标签:
原文地址:http://www.cnblogs.com/XBlack/p/4878761.html