R 笔记
2017-02-08 12:12
501 查看
begin note
调用命令:r CMD BATCH D:\RWORKSPACE\CMD_TEST.R (注意 CMD BATCH 都要大写)
ls(): 列出所有的变量名称
ls(pattern ='v'): 根据pattern匹配
rm('xxx') 删除变量
rm(list=ls()) 删除所有的变量
> ls()
character(0)
集合转数组:
> vector1 <- c(5,9,3)
> vector2 <- c(10,11,12,13,14,15)
> column.names <- c("COL1","COL2","COL3")
> row.names <- c("ROW1","ROW2","ROW3")
> matrix.names <- c("Matrix1","Matrix2")
> result <- array(c(vector1,vector2),dim=c(3,3,2),dimnames = list(column.names,row.names,matrix.names))
> print(result)
, , Matrix1
ROW1 ROW2 ROW3
COL1 5 10 13
COL2 9 11 14
COL3 3 12 15
, , Matrix2
ROW1 ROW2 ROW3
COL1 5 10 13
COL2 9 11 14
COL3 3 12 15
> # Print the third row of the second matrix of the array.
> print(result[3,,2])
ROW1 ROW2 ROW3
3 12 15
>
> # Print the element in the 1st row and 3rd column of the 1st matrix.
> print(result[1,3,1])
[1] 13
>
> # Print the 2nd Matrix.
> print(result[,,2])
ROW1 ROW2 ROW3
COL1 5 10 13
COL2 9 11 14
COL3 3 12 15
数组的操作:
# Create two vectors of different lengths. vector1 <- c(5,9,3) vector2 <- c(10,11,12,13,14,15) # Take these vectors as input to the array. array1 <- array(c(vector1,vector2),dim=c(3,3,2)) # Create two vectors of different lengths. vector3 <- c(9,1,0) vector4 <- c(6,0,11,3,14,1,2,6,9) array2 <- array(c(vector1,vector2),dim=c(3,3,2)) # create matrices from these arrays. matrix1 <- array1[,,2] matrix2 <- array2[,,2] # Add the matrices. result <- matrix1+matrix2 print(result)
# Create two vectors of different lengths. vector1 <- c(5,9,3) vector2 <- c(10,11,12,13,14,15) # Take these vectors as input to the array. new.array <- array(c(vector1,vector2),dim=c(3,3,2)) print(new.array) # Use apply to calculate the sum of the rows across all the matrices.计算所有矩阵每行的和 result <- apply(new.array, c(1), sum) print(result)
# Create a vector as input. data <- c("East","West","East","North","North","East","West","West","West","East","North") print(data) print(is.factor(data)) # Apply the factor function. factor_data <- factor(data) print(factor_data) print(is.factor(factor_data))#判断是否是factor , true
# Create the vectors for data frame. height <- c(132,151,162,139,166,147,122) weight <- c(48,49,66,53,67,52,40) gender <- c("male","male","female","female","male","female","male") # Create the data frame. input_data <- data.frame(height,weight,gender) print(input_data) # Test if the gender column is a factor. a row of data frame is a factor, like this print(is.factor(input_data$gender)) # Print the gender column so see the levels. print(input_data$gender)
data <- c("East","West","East","North","North","East","West","West","West","East","North") # Create the factors factor_data <- factor(data) print(factor_data) # Apply the factor function with required order of the level. 改变了level的顺序 new_order_data <- factor(factor_data,levels = c("East","West","North")) print(new_order_data)
gl(n, k, labels)
以下是所使用的参数的说明:
n 是一个整数来给出级别数
k 是一个整数给出重复的数量
labels 为所得到的因子级别标签的向量。
示例
v <- gl(3, 4, labels = c("Tampa", "Seattle","Boston")) print(v)
创建数据帧
# Create the data frame. emp.data <- data.frame( emp_id = c (1:5), emp_name = c("Rick","Dan","Michelle","Ryan","Gary"), salary = c(623.3,515.2,611.0,729.0,843.25), start_date = as.Date(c("2012-01-01","2013-09-23","2014-11-15","2014-05-11","2015-03-27")), stringsAsFactors=FALSE ) # Print the data frame. print(emp.data)
str(emp.data)
print(summary(emp.data))
# Extract Specific columns.提取数据帧的列 result <- data.frame(emp.data$emp_name,emp.data$salary) print(result)
result <- emp.data[1:2,]#提取数据的前两行和所有的列 print(result)
result <- emp.data[c(3,5),c(2,4)]# 提取3, 5 行的第2,4 列的数据 print(result)
# Add the "dept" coulmn.添加列 emp.data$dept <- c("IT","Operations","IT","HR","Finance") v <- emp.data print(v)
# Create the first data frame. emp.data <- data.frame( emp_id = c (1:5), emp_name = c("Rick","Dan","Michelle","Ryan","Gary"), salary = c(623.3,515.2,611.0,729.0,843.25), start_date = as.Date(c("2012-01-01","2013-09-23","2014-11-15","2014-05-11","2015-03-27")), dept=c("IT","Operations","IT","HR","Finance"), stringsAsFactors=FALSE ) # Create the second data frame, 添加行记录 emp.newdata <- data.frame( emp_id = c (6:8), emp_name = c("Rasmi","Pranab","Tusar"), salary = c(578.0,722.5,632.8), start_date = as.Date(c("2013-05-21","2013-07-30","2014-06-17")), dept = c("IT","Operations","Fianance"), stringsAsFactors=FALSE ) # Bind the two data frames. emp.finaldata <- rbind(emp.data,emp.newdata) print(emp.finaldata)
install.packages(file_name_with_path, repos = NULL, type="source") # Install the package named "XML", 安装package install.packages("E:/XML_3.98-1.3.zip", repos = NULL, type="source")
# Create vector objects. city <- c("Tampa","Seattle","Hartford","Denver") state <- c("FL","WA","CT","CO") zipcode <- c(33602,98104,06161,80294) # Combine above three vectors into one data frame. cbind is column bind 行的结列 addresses <- cbind(city,state,zipcode) # Print a header. cat("# # # # The First data frame\n") # Print the data frame. print(addresses) # Create another data frame with similar columns new.address <- data.frame( city = c("Lowry","Charlotte"), state = c("CO","FL"), zipcode = c("80230","33949"), stringsAsFactors=FALSE ) # Print a header. cat("# # # The Second data frame\n") # Print the data frame. print(new.address) # Combine rows form both the data frames. rbind is row bind 结合行 all.addresses <- rbind(addresses,new.address) # Print a header. cat("# # # The combined data frame\n") # Print the result. print(all.addresses)
melt and cast
熔化和转换
R语言编程的最有趣的地方是关于改变多个步骤中的数据的形状来获得所希望的形状。用来做这种函数被称为 melt() 和 cast()。我们认为数据集被称为 ships 出现在库被称为 "MASS".
library(MASS) print(ships)
当我们上面的代码执行时,它产生以下结果:
type year period service incidents 1 A 60 60 127 0 2 A 60 75 63 0 3 A 65 60 1095 3 4 A 65 75 1095 4 5 A 70 60 1512 6 ............. ............. 8 A 75 75 2244 11 9 B 60 60 44882 39 10 B 60 75 17176 29 11 B 65 60 28609 58 ............ ............ 17 C 60 60 1179 1 18 C 60 75 552 1 19 C 65 60 781 0 ............ ............
融化数据
现在,我们融化数据需要组织其转换类型(type), 并且 year 到多行以外的所有列。molten.ships <- melt(ships, id = c("type","year")) print(molten.ships)
当我们上面的代码执行时,它产生以下结果:
type year variable value 1 A 60 period 60 2 A 60 period 75 3 A 65 period 60 4 A 65 period 75 ............ ............ 9 B 60 period 60 10 B 60 period 75 11 B 65 period 60 12 B 65 period 75 13 B 70 period 60 ........... ........... 41 A 60 service 127 42 A 60 service 63 43 A 65 service 1095 ........... ........... 70 D 70 service 1208 71 D 75 service 0 72 D 75 service 2051 73 E 60 service 45 74 E 60 service 0 75 E 65 service 789 ........... ........... 101 C 70 incidents 6 102 C 70 incidents 2 103 C 75 incidents 0 104 C 75 incidents 1 105 D 60 incidents 0 106 D 60 incidents 0 ........... ...........
转换数据
我们可以转化数据转换成在创建每种类型的 ships 每年的汇总的新形式。它是通过使用 case()函数。recasted.ship <- cast(molten.ships, type+year~variable,sum) print(recasted.ship)
当我们上面的代码执行时,它产生以下结果:
type year period service incidents 1 A 60 135 190 0 2 A 65 135 2190 7 3 A 70 135 4865 24 4 A 75 135 2244 11 5 B 60 135 62058 68 6 B 65 135 48979 111 7 B 70 135 20163 56 8 B 75 135 7117 18 9 C 60 135 1731 2 10 C 65 135 1457 1 11 C 70 135 2731 8 12 C 75 135 274 1 13 D 60 135 356 0 14 D 65 135 480 0 15 D 70 135 1557 13 16 D 75 135 2051 4 17 E 60 135 45 0 18 E 65 135 1226 14 19 E 70 135 3318 17 20 E 75 135 542 1
读一个CSV文件
以下是 read.csv()函数的一个简单的例子,它读取在当前工作目录的可用的 CSV 文件:data <- read.csv("input.csv") print(data)
data <- read.csv("input.csv")#分析data的行列情况 print(is.data.frame(data)) print(ncol(data)) print(nrow(data))
# Get the person detail having max salary. 求最高工资记录的具体情况 retval <- subset(data, salary == max(salary)) print(retval)
# Create a data frame. data <- read.csv("input.csv") info <- subset(data, salary > 600 & dept == "IT")#工资大于600 并且是IT部门的员工 print(info)
# Create a data frame. data <- read.csv("input.csv") retval <- subset(data, as.Date(start_date) > as.Date("2014-01-01"))#生日大于2014-1-1日 print(retval)
# Load the packages required to read XML files. library("XML") library("methods") # Convert the input xml file to a data frame. xmldataframe <- xmlToDataFrame("input.xml")#加载xml里面的数据 print(xmldataframe)
# Load the package required to read JSON files. library("rjson") # Give the input file name to the function. result <- fromJSON(file="input.json") # Print the result. print(result)
# Load the package required to read JSON files. library("rjson") # Give the input file name to the function. result <- fromJSON(file="input.json") # Convert JSON file to a data frame. json_data_frame <- as.data.frame(result)#json字符类型 到frame 帧 print(json_data_frame)
# Create a connection Object to MySQL database. # We will connect to the sampel database named "sakila" that comes with MySql installation. mysqlconnection = dbConnect(MySQL(), user='root', password='', dbname='sakila', host='localhost') # List the tables available in this database. dbListTables(mysqlconnection)
# Query the "actor" tables to get all the rows.输入sql result = dbSendQuery(mysqlconnection, "select * from actor") # Store the result in a R data frame object. n=5 is used to fetch first 5 rows.现在查询的条数 data.frame = fetch(result, n=5) print(data.fame)
# Create the function. 找到向量中出现次数最多的元素 getmode <- function(v) { uniqv <- unique(v) uniqv[which.max(tabulate(match(v, uniqv)))] } # Create the vector with numbers. v <- c(2,1,2,3,1,2,3,4,1,5,5,3,2,3) # Calculate the mode using the user function. result <- getmode(v) print(result)
K线图:
library(quantmod)
sse<-getSymbols('^SSEC', from='2015-1-1',to=Sys.Date(), src='yahoo')
SSEC.m <- to.monthly(SSEC)
tail(SSEC.m)
candleChart(SSEC.m,theme = 'white')
相关文章推荐
- 向大家推荐一个收集整理正则表达式的网站
- 最严谨的校验email地址的正则表达式及各种语言对应版
- PHP中的print_r 与 var_dump 输出数组
- JAVA中正则表达式匹配,替换,查找,切割的方法
- iOS中使用正则表达式NSRegularExpression 来验证textfiled输入的内容
- 处理Plot rendering error
- 『Data Science』R语言学习笔记,基础语法
- 『Data Science』R语言学习笔记,使用Swirl包学习R
- 『Data Science』R语言学习笔记,获取数据
- 『Data Science』R语言学习笔记,观察数据
- R语言书籍的学习路线图
- R语言学习笔记 三
- R语言学习笔记 四
- R语言学习笔记 五
- 验证码识别技术 一
- PHP中的print_r 与 var_dump 输出数组
- 20120919_01
- 20120919_02
- 20120919_3
- Review JDBC