标签:style class tor prot from 数据集 group 双引号 新版本
sqldf程序包是R语言中实用的数据管理辅助工具,但最新版本的包在处理中文时出现乱码,待解决
Usage: sqldf(x, stringsAsFactors = FALSE, row.names = FALSE...)
library(gsubfn)
library(proto)
library(RSQLite)
library(DBI)
library(tcltk)
library(sqldf)
> x <-head(iris,2)
> y <- sqldf("select * from iris limit 2")
> #比较两个数据框是否相同
> identical(x,y)
[1] TRUE
> y
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1 5.1 3.5 1.4 0.2 setosa
2 4.9 3.0 1.4 0.2 setosa
> #取出物种列中以se开头的数据子集
> a2r <- subset(iris, grepl("^se", Species))
> a2s <- sqldf("select * from iris where Species like ‘se%‘")
> all.equal(as.data.frame(a2r), a2s)
[1] TRUE
in 提取子集
> #在iris数据集中,选出量物种是setosa和virginica的行
> a3r <- subset(iris, Species %in% c("setosa", "virginica"))
> a3s <- sqldf("select * from iris where Species in (‘setosa‘, ‘virginica‘)")#注意单引号和双引号
>
> #a3r选的是子集,因而行名还是与原数据集相同
> row.names(a3r) <- NULL
> identical(a3r, a3s)
[1] TRUE
数据合计
> head(iris,2) Sepal.Length Sepal.Width Petal.Length Petal.Width Species 1 5.1 3.5 1.4 0.2 setosa 2 4.9 3.0 1.4 0.2 setosa > aggregate(iris[1:2], iris[5], mean) #计算物种前两个变量的平均值 Species Sepal.Length Sepal.Width 1 setosa 5.006 3.428 2 versicolor 5.936 2.770 3 virginica 6.588 2.974 > sqldf(‘select Species, avg("Sepal.Length") , avg("Sepal.Width") from iris group by Species‘) Species avg("Sepal.Length") avg("Sepal.Width") 1 setosa 5.006 3.428 2 versicolor 5.936 2.770 3 virginica 6.588 2.974
order by
> head(warpbreaks,2) breaks wool tension 1 26 A L 2 30 A L > head(warpbreaks[order(warpbreaks$breaks, decreasing = TRUE), ], 2) breaks wool tension 5 70 A L 9 67 A L > sqldf("select * from warpbreaks order by breaks desc limit 2") breaks wool tension 1 70 A L 2 67 A L
标签:style class tor prot from 数据集 group 双引号 新版本
原文地址:http://www.cnblogs.com/tgzhu/p/6746804.html