################################################################# ### Introduction to R ### ################################################################# # Download R: http://www.r-project.org/ # R is an interpreted language => you do not need to define all the object you use # It is possible: # - to write directly on the console (EX: make a simple sum between 2 numbers) # - (better) to write the code in a separate script (like this text editor) and run it # To run the code from the editor to the console: # - Cut and Paste the selected lines from the editor to the console (not efficient) # - R : ctrl + r or selection of rows and |->| button # - R studio: ctrl + enter # - R and RStudio: source ("file.R") 1 test <- 1 # Comments: anything after '#' is a comment, i.e., it is not executed by the R console # DO NOT FORGET TO COMMENT YOUR CODE!!!! ####################### # Preliminaries # ####################### # IMPORTANT: as Matlab, SAS and many other programs, R needs a working directory #--- To select the working directory: ---# # - if RStudio # > select "Session" -> 'Set working Directory' -> 'Choose Directory', # - if R # > select "File" -> 'Change working Directory' # - line command # > setwd("path of the folder where data are contained and files have to be saved") setwd('your/working/directory/') #--- To show the actual working path: ---# getwd() #--- To SAVE objects ---# # From the workspace (R and RStudio): # - Select the script/editor windows, then click 'File' -> 'Save' or 'Save as' # From the console (R and RStudio): # - To save the whole workspace (.RData file) save.image("prove fino a qui.RData") # - You may also save only some objects: save(object1, object2, file="file name.RData") # not the best option # - if objects are vectors, tables, matrices or data: write.table(object, 'object name.txt') # save in .txt format (the easiest to be loaded) #--- To LOAD objects ---# # To load RData files: load("work_so_far.RData") #--- To LIST objects ---# # To list the object defined and saved in the workspace: ls() # To list the objects contained in the working directory: dir() # To list the last n command executed history(5) #--- To REMOVE objects ---# #rm(list of objects to be removed) rm(list=ls(all=TRUE)) # Remove ALL the objects # Note: it is important to clean the memory. Since in R you don't need to declare variables nor delete them once used, # it is a safe habit to clean the shell regularly, to save memory and to avoid overscript ########## ## HELP ## ########## #--- To recall the info associated to a command ---# # help(name of the command) or ?nameofthecommand help(hist) ?hist # More in general help.start() ############# ## LIBRARY ## ############# # What are R packages? # In general, they are archives of .R files (or files from other languages) and data, # which enable the definition of particular kind of functions or the execution of # complex functions for specific analyses. #--- To INSTALL a package ---# # R : 'Packages' -> 'Install Packages' -> ... select a city (usually Italy(Padua)) and then the name of the package # R Studio: 'Tools' -> 'Install Packages' (much easier and helpful) # From the CRAN it is also possible to download the .zip version of a package. # You have to put it in your R package folder and then ask for installation from .zip file #--- To LOAD a package ---# # In order to use packages, you must load them library(MASS) # now all the functions and data contained in the MASS package are available to your workspace ######################## ## Data Import/Export ## ######################## # Import the file "record.txt" record <- read.table('record.txt', header=T) # Note: # - "header=TRUE" specifies that the first line of the file contains columns titles # - if needed, sep="\t" indicates that different entries are divided by a tabular # - if needed, dec="," indicates that the comma is used to define decimals # Dimensions and variables names dim(record) dimnames(record) # Show the first lines head(record) str(record) record[1:5,] # Name rows using labels contained in the 8th column record <- data.frame(record[,1:7], row.names=record[,8]) # Name columns var.names <- c("m100","m200","m400","m800","m1500","m3000","Marathon") dimnames(record)[[2]] <- var.names head(record) # Save the modified version write.table(record, file = 'record_mod.txt') # Read the saved file record <- read.table('record_mod.txt', header=T) ### EX1: see the assigments ### ################## ## Algebra in R ## ################## # The R console may be used as a simple calculator # - sum: 2+2 # - product: 2*3 # - subtraction and division: (5-2)/3 # - power: 17^(1/3) 4^2 # others: sqrt(9) log(10) exp(1) ################## ## OBJECTS IN R ## ################## # You may assign values using <- or = #--- scalars ---# a <- 1 a a = 2 a b <- 3 b 4 -> c c a <- b a b #--- logical operators ---# # < (<=) less (or equal) # > (>=) greater (or equal) # == equal to # != different from # & and # | or 1 < 3 5 > 7 sqrt(9) == 3 sqrt(3)*sqrt(3)-3 == 0 # !!!!! mind the approximations !!!!! sqrt(3)*sqrt(3)-3 sqrt(4)*sqrt(4)-4 == 0 4 != 9 #--- vectors ---# # c() : function concatenating elements # numeric vecotrs v <- c(2,7,3,10) v t(v) # transpose # character vectors vv <- c("mother","father","son") vvv <- c(v,vv) vvv # note that all the items are now char # extract the second element v[2] # extract more elements v[2:3] v[c(1,3)] # extract everything but the first v[-1] # extract everything but the last v[-length(v)] # Operations on vectors min(v) max(v) sum(v) mean(v) median(v) range(v) # EX: extract the minimum (maximum) only sd(v) var(v) sort(v) order(v) v^2 # Note: element by element # which() # useful to search elements whithin a vector. A logical condition is required. The index o the element is returned which(v==7) # other particular options for building vectors # seq u <- seq(0,0.5,0.1) # seq(from, to, by) u length(u) u <- seq(0,0.5,len=9) # seq(from, to, length) u u1 <- seq(0.5,0,-0.1) # decreasing sequence u1 u2 <- 1:5 # default: unit lag u2 # rep w <- rep(1,10) # repat 'first argument' 'second argument' times w # you may also repeat sequences w1 <- rep(c(1,2,3,4),3) # QUIZ: which is the length of w1? w1 # you can also combine commands w2 <- rep(1:8,rep(3,8)) w2 w3 <- rep(seq(0,10,len=6),1:6) w3 w4 <- rep(c(5,9,1,3),c(1,4,2,0)) w4 #--- factors ---# age <- c(16,72,33,46,90,1,28) sex <- as.factor(c("M","M","M","F","M","M","F")) levels(sex) tapply(age,sex,mean) #--- matrices & arrays ---# dati <- c(1,2,3,4,5,6,7,8,9,10,11,12) W <- matrix(data = dati, nrow = 4, ncol = 3, byrow = F) w # or w <- array (data = dati, dim=c(4,3)) W # or W <- rbind(c(1,5,9),c(2,6,10),c(3,7,11),c(4,8,12)) # built joining rows W # or W <- cbind(1:4,5:8,9:12) # built joining columns W # Extract elements from the matrix W dim(W) W[2,3] W[2:4,1] W[4,c(1,3)] # Extract rows and/or columns W[3,] W[,2] # Extraxt sub-matrices W[c(1,3,4),2:3] #--- lists ---# exam <- list (course = 'Statistics', date.exam = '30/06/2010', num_students = 25, stu_ID = as.character(c(45020,45679,46789,43126,42345,47568,45674)), marks = c(30,19,29,21,25,26,27)) exam exam$marks # or exam[[5]] # list may also concatenate objects of different size (ex: dataframe and vectors) #--- data frame ---# # Data frames are matrices where each column may differ in nature from others iris # basic dataset in R head(iris) # shows the first lines of the dataframe str(iris) # specifies the type of each variable names(iris) Sepal.Lenght[1:5] iris$Sepal.Length[1:5] attach(iris) Sepal.Length[1:5] detach(iris) Sepal.Length[1:5] ################## ## Algebra in R ## ################## # NB: the default option in R is to execute operations component by component a <- 1 b <- 2 c <- c(2,3,4) d <- c(10,10,10) e <- c(1,2,3,4) f <- 1:6 W # dimensioni 4x3 da prima Z <- rbind(rep(0,3),1:3,rep(10,3),c(4,7,1)) Z # Z ha le stesse dimensioni di W #--- algebra on scalars and vectors ---# a+b # scalar + scalar c+d # vector + vector a*b # scalar * scalar c*d # vector * vector c+a # vettore + scalare c^2 # power exp(c) # MIND THE RECYCLING!!! # when operations are carried out on vector with different size, R gives results, but they are not consistent. c e c+e # warning, not error! # The two vectors have different lenght, but R computes their sum anyway. # How it is possible? # The component by component sum is carried out up to the end of the shorter element. # then R recycles! c f c+f # recycling happens with no warning, since the length of f vector is multiple of the one of c vector #--- algebra on matrices ---# Z W Z+W colSums(Z) rowSums(Z) Z*W # mind the size of the 2 matrices Z%*%t(W) # 4x3 * 3x4 = 4x4 # inverse of a matrix C <- 3*diag(4,3) solve(C) solve(W) # error! it is not a squared matrix library(MASS) ginv(C) # generalized inverse #################### ## Functions in R ## #################### # Functions are the only object in R that needs to be defined (and run) before being used. # Specify in parentheses data and parameters to be provided to the function and give name to the function std.err <- function( vector ) { std.dev <- sd( vector ) n <- length( vector ) std.err <- std.dev/sqrt(n) } mytrial <- std.err(v) mytrial # Note: objects defined inside the function do not need to be declared and exist only during the execution. # Their content will be deleted at the end of the run. # The only object returned is the one associated to the function name