『Data Science』R语言学习笔记,基础语法
|
Multiple objects can be deparsed using the dump function and read back in using source. > x <- "foo" ## create the first data object
> y <- data.frame(a = 1,b = "a") ## create the second data object
> dump(c("x","y"),file = "data.R") ## store the both data object in to a file called 'data.R'
> rm(x,y) ## remove the both data object from RAM
> source("data.R") ## import the dumped file 'data.R'
> y ## print the data object 'y' from 'data.R'
a b
1 1 a
> x ## print the data object 'x' from 'data.R'
[1] "foo"
Connections: Interfaces to the Outside WorldData are read in using connection interfaces. Connections can be made to files (most common) or to other more exotic things.
> con <- file('db.txt','r')
> readLines(con)
Subsetting
Basic> x <- c("a","c","d","e")
> x[1]
[1] "a"
> x[2]
[1] "b"
> x[1:3]
[1] "a" "b" "c"
> x[x > "a"]
[1] "b" "c" "d" "e"
> u <- x>"a"
> u
[1] FALSE TRUE TRUE TRUE TRUE
> x[u]
[1] "b" "c" "d" "e"
Lists> x <- list(foo = 1:4,bar = 0.6) > x[1] $foo [1] 1 2 3 4 > x[[1]] [1] 1 2 3 4 > x[[2]] [1] 0.6 > x$bar [1] 0.6 > x$foo [1] 1 2 3 4 > x[["bar"]] [1] 0.6 > x["bar"] $bar [1] 0.6 > x <- list(foo = 1:4,bar = 0.6,baz = "hello") > x[c(1,3)] $foo [1] 1 2 3 4 $baz [1] "hello" > name <- "foo" > x[[name]] [1] 1 2 3 4 > x$name ## `name` is a variable,not a `level`,so does not has x$name in the list `x`. NULL > x$foo [1] 1 2 3 4 MatricesMatrices can be subsetted in the usual way with (i,j) type indices. > x <- matrix(1:6,3)
> x[1,2]
[1] 3
> x[1,]
[1] 1 3 5
> x[,2]
[1] 3 4
> x[1,drop = FALSE]
[,1]
[1,] 3
> x[1,1] [,2] [,3]
[1,] 1 3 5
Partial MatchingPartial matching of names is allowed with > x <- list(aardvark = 1:5) > x$a [1] 1 2 3 4 5 > x[["a"]] NULL > x[["a",exact = FALSE]] [1] 1 2 3 4 5 Removing NA Values> x <- c(1,4,5) > bad <- is.na(x) > x[!bad] [1] 1 2 4 5 Use built-in function > x <- c(1,5)
> y <- c("a","f")
> good <- complete.cases(x,y)
> good
[1] TRUE TRUE FALSE TRUE FALSE TRUE
> x[good]
[1] 1 2 4 5
> y[good]
[1] "a" "b" "d" "f"
From data frame > airquality[1:6,] ## call a matrix Ozone Solar.R Wind Temp Month Day 1 41 190 7.4 67 5 1 2 36 118 8.0 72 5 2 3 12 149 12.6 74 5 3 4 18 313 11.5 62 5 4 5 NA NA 14.3 56 5 5 ## there a NA value in this vector 6 28 NA 14.9 66 5 6 ## there a NA value in this vector > good <- complete.cases(airquality) ## as there a NA value in 6s/7s row,so it is filtered. > airquality[good,][1:6,] Ozone Solar.R Wind Temp Month Day 1 41 190 7.4 67 5 1 2 36 118 8.0 72 5 2 3 12 149 12.6 74 5 3 4 18 313 11.5 62 5 4 7 23 299 8.6 65 5 7 8 19 99 13.8 59 5 8 Vectorized Operations
> x <- 1:4; y <- 6:9 > x + y [1] 7 9 11 13 > x > 2 [1] FALSE FALSE TRUE TRUE > y >= 2 [1] TRUE TRUE TRUE TRUE > y == 8 [1] FALSE FALSE TRUE FALSE > x * y [1] 6 14 24 36 > x / y [1] 0.1666667 0.2857143 0.3750000 0.4444444 Logic Controlif-else> if (x > 3) {
+ y <- 10
+ } else {
+ y <- 0
+ }
For> x <- c("a","d")
> for (i in 1:4) {
+ print(x[i])
+ }
[1] "a"
[1] "b"
[1] "c"
[1] "d"
> for(i in seq_along(x)) {
+ print(x[i])
+ }
[1] "a"
[1] "b"
[1] "c"
[1] "d"
> for(letter in x){
+ print(letter)
+ }
[1] "a"
[1] "b"
[1] "c"
[1] "d"
> for(i in 1:4) print(x[i])
[1] "a"
[1] "b"
[1] "c"
[1] "d"
While> count <- 0
> while(count < 10) {
+ print(count)
+ count <- count + 1
+ }
[1] 0
[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
[1] 9
> z <- 5
> while(z >=3 && z <= 10) {
+ print(z)
+ coin <- rbinom(1,1,0.5)
+
+ if(coin == 1) {
+ z <- z + 1
+ } else {
+ z <- z - 1
+ }
+ }
[1] 5
[1] 4
[1] 3
[1] 4
[1] 5
[1] 4
[1] 5
[1] 4
[1] 3
Repeat> x0 <- 1
> tol <- 1e-8
> repeat {
+ x1 <- computeEstimate()
+ if(abs(x1 - x0) < tol) {
+ break
+ } else {
+ x0 <- x1
+ }
+ }
> for(i in 1:100) {
+ if(i <= 20) {
+ next ## jump into next loop
+ }
+ }
Function> add2 <- function(x,y) {
+ x + y
+ }
> add2(2,3)
[1] 5
> above <- function(x,n = 10) {
+ use <- x >n
+ x[use]
+ }
> x <- 1:20
> above(x,10)
[1] 11 12 13 14 15 16 17 18 19 20
> columnmean <- function(y,removeNA = TRUE) {
+ nc <- ncol(y)
+ means <- numeric(nc)
+ for(i in 1:nc) {
+ means[i] <- mean(y[,i],na.rm = removeNA)
+ }
+ means ## return result
+ }
> columnmean(airquality) ## compute the mean of values of columns of `airqulity`.
[1] 42.129310 185.931507 9.957516 77.882353 6.993464 15.803922
The
|
