rm(list=ls())

###########################
##### LAB. SESSION 1: #####
## AN INTRODUCTION TO R ###
###########################

# Slide 2
sqrt(4)
?sqrt
apropos("sq")

# Slide 4
x <- c(1,0.3,4,5,-12,-4.12)
x

y <- c(x,0,x)
y

v <- 2*x+abs(x)
v[3]
 2*4^2+abs(4)

sum(x)

x[1]+x[2]+x[3]+x[4]+x[5]+x[6]


mean(x)

(x[1]+x[2]+x[3]+x[4]+x[5]+x[6])/length(x)

sum(x)/length(x)

# Slide 7 - Generating regular sequences
x1 <- c(-4, -2, -1, 0, 1, 2, 3, 4)

x2 <- seq(from=-4,to=4,by=1)
x2

x3 <- -4:4
x3

# Slide 7 - Repeating values
x <- rep(1,5)
x

y <- rep(1:3, 3)
y

z <- rep(1:3, each=3)
z

# Slide 7 - Logical vectors
x <- seq(-3,3,by=0.5)
x

y1 <- x > 0
y1

y2 <- x >= 0
y2

# Slide 8 - Chrachter vectors
x <- c("Bye", "Arrivederci", "Ndapita", "Caraysiiyo")
x

y <- paste(x,"Mr. Alinafe")
y

# Slide 9 - Missing values
x <- c(0:10,NA,12)
x

ind <- is.na(x)
ind
!ind
y <- x[!ind]
y

# Slide 9 - Selecting and modifying a subset of the data
x <- seq(-0.5,3,by=0.5)
x
y <- x[x > 0.51]
y

# Slide 9 - Matrices 
M <- matrix(NA,nrow=5,ncol=3)
M[2,3] <- 1
M[c(1,3,4,5),1] <- -2
M
M[1:3,c(1,3)]

# Slide 9 - Unordered and ordered factors
state <- c(rep("England",3), "Italy", 
           rep("Malawi",10), rep("Somalia",5))
state <- factor(state)
levels(state)
table(state)

income <- c(rep("(1000-3000]",5),
            rep("(3000-5500]",3),rep("(5500-10000]",2))
income <- factor(income, ordered=TRUE)
income

# Slide 10 - Lists
Ls <- list(name="Emanuele", age=27, Male=TRUE, weight=NA)
str(Ls)

Ls$age
Ls[[2]]

# Slide 10 - Data frame
data <- data.frame(ID=c("A", "B", "C", "D"), 
        age=c(20,21,24,NA), gender=c("M","M","F","F"),
        employed=c(1,1,1,0),
        state=c("Germany", "France", "England", "Italy"))
str(data)
data[data$ID=="B",]

# Slide 11
args(read.table)
args(read.csv)

data.Liberia <- read.csv("http://www.lancaster.ac.uk/staff/diggle/Malawi2015/LiberiaRemoData.csv")
# or data.Liberia <- read.csv(file.choose())
# or data.Liberia <- read.cev("file directory")
str(data.Liberia)

write.table(data.Liberia,file="Liberia.txt",row.names=FALSE)

# Slide 14
curve(dnorm(x,mean=0,sd=0.8),xlim=c(-5,5),col=1,lty=1,
      ylab="f(x)",xlab="x",
      main="Gaussian Distribution - density function")
curve(dnorm(x,mean=0,sd=1),xlim=c(-5,5),col=2,lty=2,add=TRUE)
curve(dnorm(x,mean=0,sd=2),xlim=c(-5,5),col=3,lty=3,add=TRUE)

legend(-4,0.4,c("sd=0.8", "sd=1", "sd=2"),col=1:3,lty=1:3)

# Slide 15
par(mfrow=c(1,2))

curve(pnorm(x,mean=0,sd=0.8),xlim=c(-5,5),col=1,lty=1,
      ylab="F(x)",xlab="x",main="Gaussian Distribution - CDF")
curve(pnorm(x,mean=0,sd=1),xlim=c(-5,5),col=2,lty=2,add=TRUE)
curve(pnorm(x,mean=0,sd=2),xlim=c(-5,5),col=3,lty=3,add=TRUE)

legend(-4,1,c("sd=0.8", "sd=1", "sd=2"),col=1:3,lty=1:3)

curve(qnorm(x,mean=0,sd=0.8),xlim=c(0,1),col=1,lty=1,
      ylab="q(x)",xlab="x",main="Gaussian Distribution - quantile function")
curve(qnorm(x,mean=0,sd=1),xlim=c(0,1),col=2,lty=2,add=TRUE)
curve(qnorm(x,mean=0,sd=2),xlim=c(0,1),col=3,lty=3,add=TRUE)

legend(0,1.9,c("sd=0.8", "sd=1", "sd=2"),col=1:3,lty=1:3)

# Slide 16
mu <- 3
sigma2 <- 1.78
n <- 1000

set.seed(123)
x <- rnorm(n,mean=mu,sd=sqrt(sigma2))

par(mfrow=c(2,1))
hist(x,prob=TRUE,nclass=20)
curve(dnorm(x,mean=mu,sd=sqrt(sigma2)),add=TRUE,col=2)

plot(ecdf(x),main="ECDF of x")
curve(pnorm(x,mean=mu,sd=sqrt(sigma2)),add=TRUE,col=2)

# Slide 18 - Conditional executions
x <- 4
if(x==4) {
   x <- 5
}
x

# Slide 18 - For loops
n.iter <- 20
fib <- rep(NA,n.iter+2)
fib[1] <- 0
fib[2] <- 1

for(i in 3:(n.iter+2)) {
	fib[i] <- fib[i-1]+fib[i-2]
}

# Slide 18 - While loops
n.iter <- 20
counter <- 1
fib <- rep(NA,n.iter+2)
fib[1] <- 0
fib[2] <- 1

while(counter <= n.iter) {
	fib[counter+2] <- fib[counter+1]+fib[counter]
	counter <- counter + 1
}
fib

# Slide 
sum.vectors <- function(x,y) {
   n.x <- length(x)
   n.y <- length(y)
   if(n.x!=n.y) stop("'x' and 'y' must be of the same length")
   
   z <- rep(NA,n.x)
   for(i in 1:n.x) {
   	z[i] <- x[i] + y[i]
   }
   return(z)
}

sum.vectors(1:4,2:5)
1:4+2:5

sum.vectors(1:3,2:5)
1:3+2:5