您的位置：首页 > 其它

R 速学之篇二

2016-08-02 11:33 267 查看

地址：http://www.statmethods.net/index.html

二、基本统计：

（一）描述统计：

1、基本

sapply(mydata, mean, na.rm=TRUE) #sapply include mean, sd, var, min, max, median, range, and quantile.

summary(mydata) # mean,median,25th and 75th quartiles,min,max

fivenum(x) # Tukey min,lower-hinge, median,upper-hinge,max

2、Hmisc

library(Hmisc)

describe(mydata)

# n, nmiss, unique, mean, 5,10,25,50,75,90,95th percentiles

# 5 lowest and 5 highest scores

3、pastecs

library(pastecs)

stat.desc(mydata)

# nbr.val, nbr.null, nbr.na, min max, range, sum,

# median, mean, SE.mean, CI.mean, var, std.dev, coef.var

4、psych

library(psych)

describe(mydata)

# item name ,item number, nvalid, mean, sd,

# median, mad, min, max, skew, kurtosis, se

分组汇总：

library(psych)

describe.by(mydata, group,...)

5、doBy

library(doBy)

summaryBy(mpg + wt ~ cyl + vs, data = mtcars,

FUN = function(x) { c(m = mean(x), s = sd(x)) } )

# produces mpg.m wt.m mpg.s wt.s for each

# combination of the levels of cyl and vs

2、频数

# 2-Way Frequency Table

attach(mydata)

mytable <- table(A,B) # A will be rows, B will be columns

mytable # print table

margin.table(mytable, 1) # A frequencies (summed over B)

margin.table(mytable, 2) # B frequencies (summed over A)

prop.table(mytable) # cell percentages

prop.table(mytable, 1) # row percentages

prop.table(mytable, 2) # column percentages

# 3-Way Frequency Table

mytable <- table(A, B, C)

ftable(mytable)

newfactor <- factor(oldfactor, exclude=NULL) #table是包含缺失值的，不需要的要排除

# 3-Way Frequency Table

mytable <- xtabs(~A+B+c, data=mydata)

ftable(mytable) # print table

summary(mytable) # chi-square test of indepedence

交叉表Crosstable，类似于SPSS，可选项很多。

# 2-Way Cross Tabulation

library(gmodels)

CrossTable(mydata$myrowvar, mydata$mycolvar)

3、统计：

#卡方

chisq.test(mytable)

#Fisher Exact Test

fisher.test(x) #x为二维矩阵。

#Mantel-Haenszel test

mantelhaen.test(x) #x为三维矩阵。

#Loglinear Models

library(MASS)

mytable <- xtabs(~A+B+C, data=mydata)

#Mutual Independence: A, B, and C are pairwise independent

loglm(~A+B+C, mytable)

#Partial Independence: A is partially independent of B and C (i.e., A is independent of the composite variable BC)

loglin(~A+B+C+B*C, mytable)

#Conditional Independence: A is independent of B, given C

loglm(~A+B+C+A*C+B*C, mytable)

#No Three-Way Interaction

loglm(~A+B+C+A*B+A*C+B*C, mytable)

#The assocstats(mytable) function in the vcd package calculates the phi coefficient, contingency coefficient, and Cramer's V for an rxc table. The kappa(mytable) function in the vcd package calculates Cohen's kappa and weighted kappa for a confusion matrix.

4、相关：

cor(mtcars, use="complete.obs", method="kendall")

cov(mtcars, use="complete.obs")

# rcorr( ) function in the Hmisc

library(Hmisc)

rcorr(x, type="pearson") # type can be pearson or spearman

#mtcars is a data frame

rcorr(as.matrix(mtcars))

# polychoric correlation

# x is a contingency table of counts

library(polycor)

polychor(x)

# heterogeneous correlations in one matrix

# pearson (numeric-numeric),

# polyserial (numeric-ordinal),

# and polychoric (ordinal-ordinal)

# x is a data frame with ordered factors

# and numeric variables

library(polycor)

hetcor(x)

# partial correlations

library(ggm)

data(mydata)

pcor(c("a", "b", "x", "y", "z"), var(mydata))

# partial corr between a and b controlling for x, y, z

5、t检验：

# independent 2-group t-test

t.test(y~x) # where y is numeric and x is a binary factor

# independent 2-group t-test

t.test(y1,y2) # where y1 and y2 are numeric

# paired t-test

t.test(y1,y2,paired=TRUE) # where y1 & y2 are numeric

# one sample t-test

t.test(y,mu=3) # Ho: mu=3

You can use the var.equal = TRUE option to specify equal variances and a pooled variance estimate. You can use the alternative="less" or alternative="greater" option to specify a one tailed test.

6、非参：

# independent 2-group Mann-Whitney U Test

wilcox.test(y~A)

# where y is numeric and A is A binary factor

# independent 2-group Mann-Whitney U Test

wilcox.test(y,x) # where y and x are numeric

# dependent 2-group Wilcoxon Signed Rank Test

wilcox.test(y1,y2,paired=TRUE) # where y1 and y2 are numeric

# Kruskal Wallis Test One Way Anova by Ranks

kruskal.test(y~A) # where y1 is numeric and A is a factor

# Randomized Block Design - Friedman Test

friedman.test(y~A|B)

# where y are the data values, A is a grouping factor

# and B is a blocking factor

For the wilcox.test you can use the alternative="less" or alternative="greater" option to specify a one tailed test.

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签： R 数据分析

相关文章推荐

新的分享

章节导航