R 速学之篇二
2016-08-02 11:33
267 查看
地址:http://www.statmethods.net/index.html
二、基本统计:
(一)描述统计:
1、基本
sapply(mydata, mean, na.rm=TRUE) #sapply include mean, sd, var, min, max, median, range, and quantile.
summary(mydata) # mean,median,25th and 75th quartiles,min,max
fivenum(x) # Tukey min,lower-hinge, median,upper-hinge,max
2、Hmisc
library(Hmisc)
describe(mydata)
# n, nmiss, unique, mean, 5,10,25,50,75,90,95th percentiles
# 5 lowest and 5 highest scores
3、pastecs
library(pastecs)
stat.desc(mydata)
# nbr.val, nbr.null, nbr.na, min max, range, sum,
# median, mean, SE.mean, CI.mean, var, std.dev, coef.var
4、psych
library(psych)
describe(mydata)
# item name ,item number, nvalid, mean, sd,
# median, mad, min, max, skew, kurtosis, se
分组汇总:
library(psych)
describe.by(mydata, group,...)
5、doBy
library(doBy)
summaryBy(mpg + wt ~ cyl + vs, data = mtcars,
FUN = function(x) { c(m = mean(x), s = sd(x)) } )
# produces mpg.m wt.m mpg.s wt.s for each
# combination of the levels of cyl and vs
2、频数
# 2-Way Frequency Table
attach(mydata)
mytable <- table(A,B) # A will be rows, B will be columns
mytable # print table
margin.table(mytable, 1) # A frequencies (summed over B)
margin.table(mytable, 2) # B frequencies (summed over A)
prop.table(mytable) # cell percentages
prop.table(mytable, 1) # row percentages
prop.table(mytable, 2) # column percentages
# 3-Way Frequency Table
mytable <- table(A, B, C)
ftable(mytable)
newfactor <- factor(oldfactor, exclude=NULL) #table是包含缺失值的,不需要的要排除
# 3-Way Frequency Table
mytable <- xtabs(~A+B+c, data=mydata)
ftable(mytable) # print table
summary(mytable) # chi-square test of indepedence
交叉表Crosstable,类似于SPSS,可选项很多。
# 2-Way Cross Tabulation
library(gmodels)
CrossTable(mydata$myrowvar, mydata$mycolvar)
3、统计:
#卡方
chisq.test(mytable)
#Fisher Exact Test
fisher.test(x) #x为二维矩阵。
#Mantel-Haenszel test
mantelhaen.test(x) #x为三维矩阵。
#Loglinear Models
library(MASS)
mytable <- xtabs(~A+B+C, data=mydata)
#Mutual Independence: A, B, and C are pairwise independent
loglm(~A+B+C, mytable)
#Partial Independence: A is partially independent of B and C (i.e., A is independent of the composite variable BC)
loglin(~A+B+C+B*C, mytable)
#Conditional Independence: A is independent of B, given C
loglm(~A+B+C+A*C+B*C, mytable)
#No Three-Way Interaction
loglm(~A+B+C+A*B+A*C+B*C, mytable)
#The assocstats(mytable) function in the vcd package calculates the phi coefficient, contingency coefficient, and Cramer's V for an rxc table. The kappa(mytable) function in the vcd package calculates Cohen's kappa and weighted kappa for a confusion matrix.
4、相关:
cor(mtcars, use="complete.obs", method="kendall")
cov(mtcars, use="complete.obs")
# rcorr( ) function in the Hmisc
library(Hmisc)
rcorr(x, type="pearson") # type can be pearson or spearman
#mtcars is a data frame
rcorr(as.matrix(mtcars))
# polychoric correlation
# x is a contingency table of counts
library(polycor)
polychor(x)
# heterogeneous correlations in one matrix
# pearson (numeric-numeric),
# polyserial (numeric-ordinal),
# and polychoric (ordinal-ordinal)
# x is a data frame with ordered factors
# and numeric variables
library(polycor)
hetcor(x)
# partial correlations
library(ggm)
data(mydata)
pcor(c("a", "b", "x", "y", "z"), var(mydata))
# partial corr between a and b controlling for x, y, z
5、t检验:
# independent 2-group t-test
t.test(y~x) # where y is numeric and x is a binary factor
# independent 2-group t-test
t.test(y1,y2) # where y1 and y2 are numeric
# paired t-test
t.test(y1,y2,paired=TRUE) # where y1 & y2 are numeric
# one sample t-test
t.test(y,mu=3) # Ho: mu=3
You can use the var.equal = TRUE option to specify equal variances and a pooled variance estimate. You can use the alternative="less" or alternative="greater" option to specify a one tailed test.
6、非参:
# independent 2-group Mann-Whitney U Test
wilcox.test(y~A)
# where y is numeric and A is A binary factor
# independent 2-group Mann-Whitney U Test
wilcox.test(y,x) # where y and x are numeric
# dependent 2-group Wilcoxon Signed Rank Test
wilcox.test(y1,y2,paired=TRUE) # where y1 and y2 are numeric
# Kruskal Wallis Test One Way Anova by Ranks
kruskal.test(y~A) # where y1 is numeric and A is a factor
# Randomized Block Design - Friedman Test
friedman.test(y~A|B)
# where y are the data values, A is a grouping factor
# and B is a blocking factor
For the wilcox.test you can use the alternative="less" or alternative="greater" option to specify a one tailed test.
二、基本统计:
(一)描述统计:
1、基本
sapply(mydata, mean, na.rm=TRUE) #sapply include mean, sd, var, min, max, median, range, and quantile.
summary(mydata) # mean,median,25th and 75th quartiles,min,max
fivenum(x) # Tukey min,lower-hinge, median,upper-hinge,max
2、Hmisc
library(Hmisc)
describe(mydata)
# n, nmiss, unique, mean, 5,10,25,50,75,90,95th percentiles
# 5 lowest and 5 highest scores
3、pastecs
library(pastecs)
stat.desc(mydata)
# nbr.val, nbr.null, nbr.na, min max, range, sum,
# median, mean, SE.mean, CI.mean, var, std.dev, coef.var
4、psych
library(psych)
describe(mydata)
# item name ,item number, nvalid, mean, sd,
# median, mad, min, max, skew, kurtosis, se
分组汇总:
library(psych)
describe.by(mydata, group,...)
5、doBy
library(doBy)
summaryBy(mpg + wt ~ cyl + vs, data = mtcars,
FUN = function(x) { c(m = mean(x), s = sd(x)) } )
# produces mpg.m wt.m mpg.s wt.s for each
# combination of the levels of cyl and vs
2、频数
# 2-Way Frequency Table
attach(mydata)
mytable <- table(A,B) # A will be rows, B will be columns
mytable # print table
margin.table(mytable, 1) # A frequencies (summed over B)
margin.table(mytable, 2) # B frequencies (summed over A)
prop.table(mytable) # cell percentages
prop.table(mytable, 1) # row percentages
prop.table(mytable, 2) # column percentages
# 3-Way Frequency Table
mytable <- table(A, B, C)
ftable(mytable)
newfactor <- factor(oldfactor, exclude=NULL) #table是包含缺失值的,不需要的要排除
# 3-Way Frequency Table
mytable <- xtabs(~A+B+c, data=mydata)
ftable(mytable) # print table
summary(mytable) # chi-square test of indepedence
交叉表Crosstable,类似于SPSS,可选项很多。
# 2-Way Cross Tabulation
library(gmodels)
CrossTable(mydata$myrowvar, mydata$mycolvar)
3、统计:
#卡方
chisq.test(mytable)
#Fisher Exact Test
fisher.test(x) #x为二维矩阵。
#Mantel-Haenszel test
mantelhaen.test(x) #x为三维矩阵。
#Loglinear Models
library(MASS)
mytable <- xtabs(~A+B+C, data=mydata)
#Mutual Independence: A, B, and C are pairwise independent
loglm(~A+B+C, mytable)
#Partial Independence: A is partially independent of B and C (i.e., A is independent of the composite variable BC)
loglin(~A+B+C+B*C, mytable)
#Conditional Independence: A is independent of B, given C
loglm(~A+B+C+A*C+B*C, mytable)
#No Three-Way Interaction
loglm(~A+B+C+A*B+A*C+B*C, mytable)
#The assocstats(mytable) function in the vcd package calculates the phi coefficient, contingency coefficient, and Cramer's V for an rxc table. The kappa(mytable) function in the vcd package calculates Cohen's kappa and weighted kappa for a confusion matrix.
4、相关:
cor(mtcars, use="complete.obs", method="kendall")
cov(mtcars, use="complete.obs")
# rcorr( ) function in the Hmisc
library(Hmisc)
rcorr(x, type="pearson") # type can be pearson or spearman
#mtcars is a data frame
rcorr(as.matrix(mtcars))
# polychoric correlation
# x is a contingency table of counts
library(polycor)
polychor(x)
# heterogeneous correlations in one matrix
# pearson (numeric-numeric),
# polyserial (numeric-ordinal),
# and polychoric (ordinal-ordinal)
# x is a data frame with ordered factors
# and numeric variables
library(polycor)
hetcor(x)
# partial correlations
library(ggm)
data(mydata)
pcor(c("a", "b", "x", "y", "z"), var(mydata))
# partial corr between a and b controlling for x, y, z
5、t检验:
# independent 2-group t-test
t.test(y~x) # where y is numeric and x is a binary factor
# independent 2-group t-test
t.test(y1,y2) # where y1 and y2 are numeric
# paired t-test
t.test(y1,y2,paired=TRUE) # where y1 & y2 are numeric
# one sample t-test
t.test(y,mu=3) # Ho: mu=3
You can use the var.equal = TRUE option to specify equal variances and a pooled variance estimate. You can use the alternative="less" or alternative="greater" option to specify a one tailed test.
6、非参:
# independent 2-group Mann-Whitney U Test
wilcox.test(y~A)
# where y is numeric and A is A binary factor
# independent 2-group Mann-Whitney U Test
wilcox.test(y,x) # where y and x are numeric
# dependent 2-group Wilcoxon Signed Rank Test
wilcox.test(y1,y2,paired=TRUE) # where y1 and y2 are numeric
# Kruskal Wallis Test One Way Anova by Ranks
kruskal.test(y~A) # where y1 is numeric and A is a factor
# Randomized Block Design - Friedman Test
friedman.test(y~A|B)
# where y are the data values, A is a grouping factor
# and B is a blocking factor
For the wilcox.test you can use the alternative="less" or alternative="greater" option to specify a one tailed test.
相关文章推荐
- 数据分析的3大作用:解决生活问题、降低被误导概率、职场发展需要
- 向大家推荐一个收集整理正则表达式的网站
- 最严谨的校验email地址的正则表达式及各种语言对应版
- PHP中的print_r 与 var_dump 输出数组
- 百万级别知乎用户数据抓取与分析之PHP开发
- Python运用于数据分析的简单教程
- JAVA中正则表达式匹配,替换,查找,切割的方法
- iOS中使用正则表达式NSRegularExpression 来验证textfiled输入的内容
- 在Python中使用NLTK库实现对词干的提取的教程
- 详解BI/数据分析/数据挖掘/业务分析概念 7fe0
- 处理Plot rendering error
- 数据分析狂想——“亲~我的东西什么时候到?”
- 『Data Science』R语言学习笔记,基础语法
- 『Data Science』R语言学习笔记,使用Swirl包学习R
- 『Data Science』R语言学习笔记,获取数据
- 『Data Science』R语言学习笔记,观察数据
- R语言书籍的学习路线图
- 就国内某个程序员问答网站的简单的分析
- R语言学习笔记 三