rm(list=ls()) ########################### ##### LAB. SESSION 1: ##### ## AN INTRODUCTION TO R ### ########################### # Slide 2 sqrt(4) ?sqrt apropos("sq") # Slide 4 x <- c(1,0.3,4,5,-12,-4.12) x y <- c(x,0,x) y v <- 2*x+abs(x) v[3] 2*4^2+abs(4) sum(x) x[1]+x[2]+x[3]+x[4]+x[5]+x[6] mean(x) (x[1]+x[2]+x[3]+x[4]+x[5]+x[6])/length(x) sum(x)/length(x) # Slide 7 - Generating regular sequences x1 <- c(-4, -2, -1, 0, 1, 2, 3, 4) x2 <- seq(from=-4,to=4,by=1) x2 x3 <- -4:4 x3 # Slide 7 - Repeating values x <- rep(1,5) x y <- rep(1:3, 3) y z <- rep(1:3, each=3) z # Slide 7 - Logical vectors x <- seq(-3,3,by=0.5) x y1 <- x > 0 y1 y2 <- x >= 0 y2 # Slide 8 - Chrachter vectors x <- c("Bye", "Arrivederci", "Ndapita", "Caraysiiyo") x y <- paste(x,"Mr. Alinafe") y # Slide 9 - Missing values x <- c(0:10,NA,12) x ind <- is.na(x) ind !ind y <- x[!ind] y # Slide 9 - Selecting and modifying a subset of the data x <- seq(-0.5,3,by=0.5) x y <- x[x > 0.51] y # Slide 9 - Matrices M <- matrix(NA,nrow=5,ncol=3) M[2,3] <- 1 M[c(1,3,4,5),1] <- -2 M M[1:3,c(1,3)] # Slide 9 - Unordered and ordered factors state <- c(rep("England",3), "Italy", rep("Malawi",10), rep("Somalia",5)) state <- factor(state) levels(state) table(state) income <- c(rep("(1000-3000]",5), rep("(3000-5500]",3),rep("(5500-10000]",2)) income <- factor(income, ordered=TRUE) income # Slide 10 - Lists Ls <- list(name="Emanuele", age=27, Male=TRUE, weight=NA) str(Ls) Ls$age Ls[[2]] # Slide 10 - Data frame data <- data.frame(ID=c("A", "B", "C", "D"), age=c(20,21,24,NA), gender=c("M","M","F","F"), employed=c(1,1,1,0), state=c("Germany", "France", "England", "Italy")) str(data) data[data$ID=="B",] # Slide 11 args(read.table) args(read.csv) data.Liberia <- read.csv("http://www.lancaster.ac.uk/staff/diggle/Malawi2015/LiberiaRemoData.csv") # or data.Liberia <- read.csv(file.choose()) # or data.Liberia <- read.cev("file directory") str(data.Liberia) write.table(data.Liberia,file="Liberia.txt",row.names=FALSE) # Slide 14 curve(dnorm(x,mean=0,sd=0.8),xlim=c(-5,5),col=1,lty=1, ylab="f(x)",xlab="x", main="Gaussian Distribution - density function") curve(dnorm(x,mean=0,sd=1),xlim=c(-5,5),col=2,lty=2,add=TRUE) curve(dnorm(x,mean=0,sd=2),xlim=c(-5,5),col=3,lty=3,add=TRUE) legend(-4,0.4,c("sd=0.8", "sd=1", "sd=2"),col=1:3,lty=1:3) # Slide 15 par(mfrow=c(1,2)) curve(pnorm(x,mean=0,sd=0.8),xlim=c(-5,5),col=1,lty=1, ylab="F(x)",xlab="x",main="Gaussian Distribution - CDF") curve(pnorm(x,mean=0,sd=1),xlim=c(-5,5),col=2,lty=2,add=TRUE) curve(pnorm(x,mean=0,sd=2),xlim=c(-5,5),col=3,lty=3,add=TRUE) legend(-4,1,c("sd=0.8", "sd=1", "sd=2"),col=1:3,lty=1:3) curve(qnorm(x,mean=0,sd=0.8),xlim=c(0,1),col=1,lty=1, ylab="q(x)",xlab="x",main="Gaussian Distribution - quantile function") curve(qnorm(x,mean=0,sd=1),xlim=c(0,1),col=2,lty=2,add=TRUE) curve(qnorm(x,mean=0,sd=2),xlim=c(0,1),col=3,lty=3,add=TRUE) legend(0,1.9,c("sd=0.8", "sd=1", "sd=2"),col=1:3,lty=1:3) # Slide 16 mu <- 3 sigma2 <- 1.78 n <- 1000 set.seed(123) x <- rnorm(n,mean=mu,sd=sqrt(sigma2)) par(mfrow=c(2,1)) hist(x,prob=TRUE,nclass=20) curve(dnorm(x,mean=mu,sd=sqrt(sigma2)),add=TRUE,col=2) plot(ecdf(x),main="ECDF of x") curve(pnorm(x,mean=mu,sd=sqrt(sigma2)),add=TRUE,col=2) # Slide 18 - Conditional executions x <- 4 if(x==4) { x <- 5 } x # Slide 18 - For loops n.iter <- 20 fib <- rep(NA,n.iter+2) fib[1] <- 0 fib[2] <- 1 for(i in 3:(n.iter+2)) { fib[i] <- fib[i-1]+fib[i-2] } # Slide 18 - While loops n.iter <- 20 counter <- 1 fib <- rep(NA,n.iter+2) fib[1] <- 0 fib[2] <- 1 while(counter <= n.iter) { fib[counter+2] <- fib[counter+1]+fib[counter] counter <- counter + 1 } fib # Slide sum.vectors <- function(x,y) { n.x <- length(x) n.y <- length(y) if(n.x!=n.y) stop("'x' and 'y' must be of the same length") z <- rep(NA,n.x) for(i in 1:n.x) { z[i] <- x[i] + y[i] } return(z) } sum.vectors(1:4,2:5) 1:4+2:5 sum.vectors(1:3,2:5) 1:3+2:5