#
# In this example of an R session, lines beginning with # are comments. Other lines are R commands, which you can
# run either by typing directly into an R command window or (recommended for anything complicated) using a
# cut-and-paste operation from a text-edit window (so you can more easily correct mistakes and keep a record of
# your work)
#
# 1. USING R AS A CALCULATOR
#
2+3
(1+3*7)/12
# if you want to save the result of a calculation, you have to assign it to a named variable.
# the "assignment" character <- is actually two characters, < followed by - without any spaces,
# and is usually called "gets". Its effect is to save the result of the calculation in a named
# object.
x<-3
# if you type the name of a variable, its value is printed on the screen
x
# all the standard mathematical funcitons are there
x<-sqrt(exp(2.5)+cos(0.23))
x
# you can combine several numbers using the c() function, and save the result as a single variable
x<-c(1,2,3,4,5,7,8,12,20)
x*x
#
# 2. SIMPLE GRAPH-PLOTTING
#
# use the : (colon) character to assign the integers 1 to 100 to a vector, with name x, and display
# the first 5 values on the screen
x<-(1:100)
x[1:5]
y<-x*x
plot(x,y)
# you can customise your plots in (almost) any way you like. Try each of the following:
plot(x,y,pch="+")
plot(x,y,type="l")
plot(x,y,type="l",lwd=3)
plot(x,y,type="l",col="red",xlab="input",ylab="output")
#
# 3. SIMULATING DATA
#
# simulate data-pairs (x,y) such that there is a "noisy" straight-line relationship
# between the two. In technical terms, the simulated data are from a linear regression
# model whose residuals are Normally distributed with standard deviation 12
mu<-2+0.5*x # this defines the straight-line
z<-12*rnorm(100) # this defines the random noise
y<-mu+z # this adds the noise to the line
# now plot the data
plot(x,y)
# add the straight-line relationship to the plot
lines(x,mu)
# plot the histogram of the residuals (the "noisy" part of the data")
hist(z)
# you can find out what any function does by typing ? followed by the name of the
# function (without any spaces). This opens up the function's "help page" (but not
# all help pages are as helpful as they might be
?hist
# customize the data-plot using optional arguments to the plot() function
plot(x,y,pch=19,col="red",cex=0.5,main="plot of x vs y")
# if you don't know the correct line (and you never do with real data) you can estimate
# it using a method called "linear regression", or "line of best fit"
#
# fit the linear regression model to the simulated data
# and summarise the result
fit<-lm(y~x)
summary(fit)
# list the names of the components of the R object that stores
# information about the fitted model
names(fit)
# these components are individually accessible using the $ sign to
# indicate which component you want
fit$coef
alpha<-fit$coef[1]; beta<-fit$coef[2] # alpha and beta are the estimated intercept and slope
# add the fitted regression line to the scatterplot, and compare it with
# the true relationship (plotted in red)
lines(x,alpha+beta*x)
lines(x,mu,col="red")
#
# 4. READING AND ANALYSING DATA
#
# now read in some real data, stored as a .csv file. The following
# command assumes that you are running R from your desktop and
# that the data are in a file called "lambdata.csv" in a folder on the desktop called "mydata"
# you can download these data from the website www.lancs.ac.uk/staff/diggle/Malawi2015
data<-read.csv("mydata/LiberiaRemoData.csv")
# alternatively, you can use the file.choose() function as follows
data<-read.csv(file.choose())
# you will now be prompted to browse thorugh your folders and seelct the file you want
#
# display the "names" of the columns of the data-file, then do
# some simple things with the data
names(data)
hist(data$ntest)
prev<-data$npos/data$ntest
mean(prev)
range(prev)
plot(data$lat,prev); plot(data$long,prev)
# ...etc
#
# now quit R - you will be invited to save your work if you wish, in
# which case it will be loaded when you next run the R program
q()