# # In this example of an R session, lines beginning with # are comments. Other lines are R commands, which you can # run either by typing directly into an R command window or (recommended for anything complicated) using a # cut-and-paste operation from a text-edit window (so you can more easily correct mistakes and keep a record of # your work) # # 1. USING R AS A CALCULATOR # 2+3 (1+3*7)/12 # if you want to save the result of a calculation, you have to assign it to a named variable. # the "assignment" character <- is actually two characters, < followed by - without any spaces, # and is usually called "gets". Its effect is to save the result of the calculation in a named # object. x<-3 # if you type the name of a variable, its value is printed on the screen x # all the standard mathematical funcitons are there x<-sqrt(exp(2.5)+cos(0.23)) x # you can combine several numbers using the c() function, and save the result as a single variable x<-c(1,2,3,4,5,7,8,12,20) x*x # # 2. SIMPLE GRAPH-PLOTTING # # use the : (colon) character to assign the integers 1 to 100 to a vector, with name x, and display # the first 5 values on the screen x<-(1:100) x[1:5] y<-x*x plot(x,y) # you can customise your plots in (almost) any way you like. Try each of the following: plot(x,y,pch="+") plot(x,y,type="l") plot(x,y,type="l",lwd=3) plot(x,y,type="l",col="red",xlab="input",ylab="output") # # 3. SIMULATING DATA # # simulate data-pairs (x,y) such that there is a "noisy" straight-line relationship # between the two. In technical terms, the simulated data are from a linear regression # model whose residuals are Normally distributed with standard deviation 12 mu<-2+0.5*x # this defines the straight-line z<-12*rnorm(100) # this defines the random noise y<-mu+z # this adds the noise to the line # now plot the data plot(x,y) # add the straight-line relationship to the plot lines(x,mu) # plot the histogram of the residuals (the "noisy" part of the data") hist(z) # you can find out what any function does by typing ? followed by the name of the # function (without any spaces). This opens up the function's "help page" (but not # all help pages are as helpful as they might be ?hist # customize the data-plot using optional arguments to the plot() function plot(x,y,pch=19,col="red",cex=0.5,main="plot of x vs y") # if you don't know the correct line (and you never do with real data) you can estimate # it using a method called "linear regression", or "line of best fit" # # fit the linear regression model to the simulated data # and summarise the result fit<-lm(y~x) summary(fit) # list the names of the components of the R object that stores # information about the fitted model names(fit) # these components are individually accessible using the $ sign to # indicate which component you want fit$coef alpha<-fit$coef[1]; beta<-fit$coef[2] # alpha and beta are the estimated intercept and slope # add the fitted regression line to the scatterplot, and compare it with # the true relationship (plotted in red) lines(x,alpha+beta*x) lines(x,mu,col="red") # # 4. READING AND ANALYSING DATA # # now read in some real data, stored as a .csv file. The following # command assumes that you are running R from your desktop and # that the data are in a file called "lambdata.csv" in a folder on the desktop called "mydata" # you can download these data from the website www.lancs.ac.uk/staff/diggle/Malawi2015 data<-read.csv("mydata/LiberiaRemoData.csv") # alternatively, you can use the file.choose() function as follows data<-read.csv(file.choose()) # you will now be prompted to browse thorugh your folders and seelct the file you want # # display the "names" of the columns of the data-file, then do # some simple things with the data names(data) hist(data$ntest) prev<-data$npos/data$ntest mean(prev) range(prev) plot(data$lat,prev); plot(data$long,prev) # ...etc # # now quit R - you will be invited to save your work if you wish, in # which case it will be loaded when you next run the R program q()