您的位置:首页 > 其它

R 速学之篇二

2016-08-02 11:33 267 查看
地址:http://www.statmethods.net/index.html

二、基本统计:

(一)描述统计:

1、基本

sapply(mydata, mean, na.rm=TRUE) #sapply include mean, sd, var, min, max, median, range, and quantile.

summary(mydata) # mean,median,25th and 75th quartiles,min,max

fivenum(x) # Tukey min,lower-hinge, median,upper-hinge,max

2、Hmisc

library(Hmisc)

describe(mydata) 

# n, nmiss, unique, mean, 5,10,25,50,75,90,95th percentiles 

# 5 lowest and 5 highest scores

3、pastecs

library(pastecs)

stat.desc(mydata) 

# nbr.val, nbr.null, nbr.na, min max, range, sum, 

# median, mean, SE.mean, CI.mean, var, std.dev, coef.var

4、psych

library(psych)

describe(mydata)

# item name ,item number, nvalid, mean, sd, 

# median, mad, min, max, skew, kurtosis, se

分组汇总:

library(psych)

describe.by(mydata, group,...)

5、doBy

library(doBy)

summaryBy(mpg + wt ~ cyl + vs, data = mtcars, 

  FUN = function(x) { c(m = mean(x), s = sd(x)) } )

# produces mpg.m wt.m mpg.s wt.s for each 

# combination of the levels of cyl and vs

2、频数

# 2-Way Frequency Table 

attach(mydata)

mytable <- table(A,B) # A will be rows, B will be columns 

mytable # print table 

margin.table(mytable, 1) # A frequencies (summed over B) 

margin.table(mytable, 2) # B frequencies (summed over A)

prop.table(mytable) # cell percentages

prop.table(mytable, 1) # row percentages 

prop.table(mytable, 2) # column percentages

# 3-Way Frequency Table 

mytable <- table(A, B, C) 

ftable(mytable)

newfactor <- factor(oldfactor, exclude=NULL) #table是包含缺失值的,不需要的要排除

# 3-Way Frequency Table

mytable <- xtabs(~A+B+c, data=mydata)

ftable(mytable) # print table 

summary(mytable) # chi-square test of indepedence

交叉表Crosstable,类似于SPSS,可选项很多。

# 2-Way Cross Tabulation

library(gmodels)

CrossTable(mydata$myrowvar, mydata$mycolvar)

3、统计:

#卡方

chisq.test(mytable)

#Fisher Exact Test

fisher.test(x) #x为二维矩阵。

#Mantel-Haenszel test

mantelhaen.test(x)  #x为三维矩阵。

#Loglinear Models

library(MASS)

mytable <- xtabs(~A+B+C, data=mydata)

#Mutual Independence: A, B, and C are pairwise independent

loglm(~A+B+C, mytable)

#Partial Independence: A is partially independent of B and C (i.e., A is independent of the composite variable BC)

loglin(~A+B+C+B*C, mytable)

#Conditional Independence: A is independent of B, given C

loglm(~A+B+C+A*C+B*C, mytable)

#No Three-Way Interaction

loglm(~A+B+C+A*B+A*C+B*C, mytable)

#The assocstats(mytable) function in the vcd package calculates the phi coefficient, contingency coefficient, and Cramer's V for an rxc table. The kappa(mytable) function in the vcd package calculates Cohen's kappa and weighted kappa for a confusion matrix.

4、相关:

cor(mtcars, use="complete.obs", method="kendall") 

cov(mtcars, use="complete.obs")

# rcorr( ) function in the Hmisc

library(Hmisc)

rcorr(x, type="pearson") # type can be pearson or spearman

#mtcars is a data frame 

rcorr(as.matrix(mtcars))

# polychoric correlation

# x is a contingency table of counts

library(polycor)

polychor(x) 

# heterogeneous correlations in one matrix 

# pearson (numeric-numeric), 

# polyserial (numeric-ordinal), 

# and polychoric (ordinal-ordinal)

# x is a data frame with ordered factors 

# and numeric variables

library(polycor)

hetcor(x) 

# partial correlations

library(ggm)

data(mydata)

pcor(c("a", "b", "x", "y", "z"), var(mydata))

# partial corr between a and b controlling for x, y, z

5、t检验:

# independent 2-group t-test

t.test(y~x) # where y is numeric and x is a binary factor

# independent 2-group t-test

t.test(y1,y2) # where y1 and y2 are numeric

# paired t-test

t.test(y1,y2,paired=TRUE) # where y1 & y2 are numeric

# one sample t-test

t.test(y,mu=3) # Ho: mu=3

You can use the var.equal = TRUE option to specify equal variances and a pooled variance estimate. You can use the alternative="less" or alternative="greater" option to specify a one tailed test.

6、非参:

# independent 2-group Mann-Whitney U Test 

wilcox.test(y~A) 

# where y is numeric and A is A binary factor

# independent 2-group Mann-Whitney U Test

wilcox.test(y,x) # where y and x are numeric

# dependent 2-group Wilcoxon Signed Rank Test 

wilcox.test(y1,y2,paired=TRUE) # where y1 and y2 are numeric

# Kruskal Wallis Test One Way Anova by Ranks 

kruskal.test(y~A) # where y1 is numeric and A is a factor

# Randomized Block Design - Friedman Test 

friedman.test(y~A|B)

# where y are the data values, A is a grouping factor

# and B is a blocking factor

For the wilcox.test you can use the alternative="less" or alternative="greater" option to specify a one tailed test.
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  R 数据分析