Basic data objects
Vectors
## Create numeric, character, and logical vectors: weight <- c( 64, 78, 66, 54 ) name <- c( "Clarence", "Robert", "Elizabeth", "Charlene" ) heavy <- weight > 70 print( heavy ) ## Indicate missing data with NA: height <- c( 62, NA, 65, 72 ) ## Functions and operators for creating vectors: c( 4.17, 4.18, 4.20, 4.16, 4.17 ) print( 1:10 ) seq( from = 4, to = 8, by = 0.5 ) seq( from = -10, to = 10, length = 51 ) ## Create a vector containing ## 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 rep( x = 1:2, times = c( 10, 15 ) ) ## Create a vector containing ## 1 2 1 2 1 2 1 2 rep( x = 1:2, times = 4 ) ## Create a vector containing ## 1 1 1 1 2 2 2 2 rep( x = 1:2, each = 4 ) c( rep( 1, 4 ), rep( 2, 4 ) ) ## Built-in vectors: LETTERS letters month.name month.abb
Matrices
## The names() function for a matrix object appears to have different ## behavior than the names() function for a data.frame object. But ## really, all they do is set the names attribute on either object type. ## No, a data.frame does not have a names attribute.
Arrays
Lists
Data.frames
## Create a data.frame using artificial data. ad <- data.frame( x = rnorm( n = 20 ), y = rpois( n = 20, lambda = 2 ), z = rt( n = 20, df = 12 ) ) row.names( ad ) <- letters[ 1:length( ad$x ) ] ## Get the names of the rows (the observation names) ## as a character vector. row.names( x = ad ) rownames( x = ad ) ## Get the names of the columns (the variable names) ## as a character vector. names( x = ad ) colnames( x = ad ) ## Get an individual column from a data.frame as a vector. ## These all return the same result, a vector. ad$x ## All rows, column with name "x" ad[ , "x" ] ## All rows, column with name "x" ad[ , 1 ] ## All rows, column 1 ad[[ 1 ]] ## Get component 1 from the data.frame ## Get an individual row from a data.frame as a data.frame. ad[ 2, ] ## Returns an object of class data.frame ## Get an individual row from a data.frame as a vector. ## Depending on circumstances, the returned vector is often a ## character vector. unlist( ad[ 2, ] ) ## Returns a vector ## Create a new data.frame using specified columns of an existing ## data.frame. ad2 <- ad[ c( 1, 3 ) ] ## Get a summary of a data.frame. summary( object = ad ) ## Apply a function to each column of a data.frame. ## Returns a list the same length as X. lapply( X = ad, FUN = mean ) ## returns a list lapply( X = ad, FUN = quantile, probs = 1:3/4 ) ## returns a list ## Apply a function to each column of a data.frame. ## Returns a vector or a matrix when possible. sapply( X = ad, FUN = mean ) ## returns a vector sapply( X = ad, FUN = quantile, probs = 1:3/4 ) ## returns a matrix ## This example, from Data Analysis and Graphics Using R, by ## John Maindonald and John Braun, p. 15, calculates growth ## rates of Australian populations from 1917 to 1997. library( package = DAAG ) sapply( X = austpop[ -c( 1, 10 ) ], FUN = function( x ) { ( x[ 9 ] - x[ 1 ] ) / x[ 1 ] } ) ## Plotting using sapply(). This example is from the solutions to ## Data Analysis and Graphics Using R, by John Maindonald and John Braun, ## p. 18, problem 10. invisible() hides the list returned by sapply, which ## contains NULL components. lapply() can be used here instead of sapply(). library( package = DAAG ) get( getOption( "device" ) )() oldpar <- par( mfrow = c( 2, 4 ) ) invisible( sapply( X = 2:9, FUN = function( i, df ) { plot( x = df[ , 1 ], y = log( df[ , i ] ), xlab = "Year", ylab = names( df )[ i ], pch = 16, ylim = c( 0, 10 ) ) }, df = austpop ) ) ## Identify rows with NA values. library( package = DAAG ) possum[ !complete.cases( possum ), ] ## 3 rows returned ## Create a new data.frame, omitting rows containing NA. dim( possum ) ## 104 rows newpossum <- na.omit( possum ) dim( newpossum ) ## 101 rows ## Functions for manipulating vectors, matrices, lists, and data.frames cbind() rbind() aggregate() data.frame() data.matrix() list() matrix() merge()
Viewing and Editing
## Return the first or last part of a vector, matrix, data.frame, or ## function. head( CO2 ) tail( CO2 ) str( CO2 ) page( CO2 ) file.show() fix( CO2 ) edit( CO2 ) ## More useful for editing functions
Control flow
## Get help on if, for, while, repeat, break, and next ?"if" ## Loop using for for ( i in 1:10 ) { if ( i == 2 ) next if ( i == 6 ) break print( i ) } ## Loop using while limit <- 10 i <- 1 while ( i <= limit ) { i <- i + 1 if ( i == 2 ) next if ( i == 6 ) break print( i ) } ## Loop using repeat i <- 1 repeat { i <- i + 1 if ( i == 2 ) next if ( i == 6 ) break print( i ) } switch()
Data sets
Files
See objects.
Packages
## Get information about data in packages that have been loaded: data() ## Get information about data in installed packages: data( package = MASS ) data( package = .packages( all.available = TRUE ) ) ## all installed packages
Viewing
## Return the first or last part of a vector, matrix, data.frame, or ## function. head( CO2 ) tail( CO2 )
Demonstrations
## Get lists of available demonstrations: demo() demo( package = .packages( all.available = TRUE ) ) ## Run some demonstrations: demo( graphics ) demo( persp )
Devices
## Open a default screen device for any platform get( getOption( "device" ) )() ## Device commands: dev.cur() dev.list() dev.next() dev.prev() dev.off() dev.set() graphics.off() capabilities() ## Copy a plot to a PNG file. ## X11 must be running in order to create the PNG file. ## The default name of the file is Rplot001.png, which is created ## in R's working directory. get( getOption( "device" ) )() plot( y = rnorm( 10 ), x = rnorm( 10 ), main = "Example Plot" ) dev.copy( device = png, width = 7 * 72, height = 7 * 72 ) dev.off() ## Copy a plot to a PNG file. The image quality is not as good as that ## produced by dev.copy() (see above). ## X11 must be running. ## ghostscript must be installed; its path can be designated in the ## environment variable R_GSCMD. get( getOption( "device" ) )() plot( y = rnorm( 10 ), x = rnorm( 10 ), main = "Example Plot" ) Sys.putenv( R_GSCMD = "/sw/bin/gs" ) dev2bitmap( file = "example.png", type = "png256", height = 7, width = 7 ) dev.print() dev.copy2eps() dev.control() ## A dev.new() function that opens a new default device. dev.new <- function() get( getOption( "device" ) )() dev.new()
Documentation and help
Documentation
## View locally installed documentation: help.start()
Examples
## Run the example code for function persp: example( persp ) ## Make R prompt before plotting each figure: op <- par( ask = TRUE ) example( persp ) par( op )
Help
help( help ) ## help() and ? are synonymous help( plot ) ?plot ## The help pages give example code at the bottom. ## Use example() to run the example code. example( plot ) ## help( function ) gives "Error: syntax error" ## Must put function into quotes help( "function" ) ## Special symbols must be quoted to get help help( "[[" ) ?"[[" help( "<-" ) ## Help for packages library( help = MASS ) help( package = MASS ) package?MASS ## Help for datasets data() data( package = MASS ) data( package = .packages( all.available = TRUE ) ) library( package = MASS ) ?bacteria
Searching
## Perform a local search: help.search( "as.data.frame" ) ## Perform an on-line search of R manuals, help pages, and ## mailing list archives: RSiteSearch( "svg" )
Vignettes
Vignettes are PDF-based documentation included in packages.
## Get a list of vignettes that are available in loaded packages: vignette() ## Get a list of vignettes from a single package: vignette( package = "grid" ) ## View specific vignettes. vignette( topic = "grid" ) vignette( topic = "rotated", package = "grid" ) ## Extract the code examples from a vignette into an editor. edit( vignette( topic = "grid" ) )
Factors
factor() as.factor() is.factor() ordered() as.ordered() is.ordered() levels() nlevels() relevel() gl() ##generate factor levels cut() ##convert numeric to factor ## split() divide into groups example( split ) ##study the examples carefully n <- 1000 m <- 49 data <- data.frame( x = rnorm( n ), y = trunc( runif( n, min = 1, max = m + 1 ) ) ) splits <- split( x = data$x, f = data$y ) o <- order( sapply( splits, median ), decreasing = TRUE ) boxplot( splits[ o ], horizontal = TRUE ) help( "[.factor" ) help( Extract )
Files
setwd( "/Volumes/CHHalling/R" ) getwd() list.files() source( file = "paletteDemo.R", echo = TRUE )
File and directory manipulation
## This section is incomplete. ?files file.create() file.exists() file.remove() file.rename() file.append() file.copy() file.symlink() dir.create() ## Get and set the working directory. getwd() setwd() ## List the files in a directory. ?list.files list.files() dir() ## Equivalent to list.files() ## Construct path to a file ?file.path pathElements <- c( "Users", "challing", "myScript.R" ); filePath <- file.path( pathElements ); ## Ascertain file access ?file.access file.access( fileNames, mode = 0 ) ## mode 0 tests for existence file.exists( fileNames ); ## Extract file information ?file.info fileNames <- c( "/Users/challing/myScript.R", "/Users/challing/anotherScript.R" ) fileInfo <- file.info( fileNames ) ## Display files ?file.show file.show( fileNames );
Data files
R data files
R history
R profile
R scripts
Functions
## Get help for a function: help( plot ) ## Get a function's arguments args( plot ) ## View the code for a function: plot ## View examples of how to use a function: example( plot )
Graphics
Colors
col2rgb() colors() hcl() hsv() palette() rgb() rgb2hsv() ## See also packages colorspace and RColorBrewer.
## Create vectors of n contiguous colors: gray( 0:8 / 8 ) rainbow( 12 ) heat.colors( 12 ) terrain.colors( 12 ) topo.colors( 12 ) cm.colors( 12 ) example( rainbow )
## Functions for plotting and modifying plots. abline() arrows() axis() bquote() box() dquote() expression() ## See ?plotmatch and demo( plotmath ) jitter() legend() ## Placing two different items in a legend ## (posted to the R-help list by Arne Henningsen): barplot(c(1:10)) lines(c(1:11),5*abs(sin(c(1:11)))) legend( 1.25, 10, c(" bar"), fill="grey", bty="n" ) legend( 1, 9.5, c("line"), lty=1, bty="n" ) rect(1,8.7,3,10) lines() matlines() mtext() plot() points() polygon() rect() rug() segments() squote() substitute() text() title() thigmophobe() ## in package plotrix thigmophobe.labels() ## in package plotrix
barplot()
## Nice examples of barplots: par( ask = TRUE ) example( barplot ) ## Barplot with rotated labels. ## This example was posted to the R-help list by Marc Schwartz. ## Create default device. get( getOption( "device" ) )() ## Increase bottom margin to make room for rotated labels par( mar = c( 7, 4, 4, 2 ) + 0.1 ) ## Create plot and get bar midpoints in 'mp' mp <- barplot( 1:10 ) ## Set up x axis with tick marks alone axis( 1, at = mp, labels = FALSE ) ## Create some text labels labels <- paste( "Label", 1:10, sep = " " ) ## Plot x axis labels at mp text( mp, par( "usr" )[ 3 ] - 0.5, srt = 45, adj = 1, labels = labels, xpd = TRUE ) ## Plot x axis label at line 4 mtext( 1, text = "X Axis Label", line = 4 ) ## This example was posted to the R-help list by ## Francisco J. Zagmutt. tab=table(rpois(100, 2)) #Creates table to make a barplot par(mar = c(6, 4, 4, 2) + 0.1)#Prepares margin to fit x axis labels pts=barplot(tab, xaxt = "n", xlab = "", col="yellow2")#Barplot of tab without x axis or label. #Also stores the middle points of the bars axis(side=1,at=pts, labels=F, tick=T)#Creates x axis with tickmarks exactly at the middle of the bars nam=paste("Text",names(tab))#Names of each category. To be used on labels text(pts, par("usr")[3] - 1.5, srt = 45, adj = 1, labels = nam, xpd = TRUE) #Adds the tickmark labels ##adj = 1 will place the text at the end ot the tick marks ##xpd = TRUE will "clip" text outside the plot region mtext(1, text = "My Axis Label", line = 4) #Adds x axis label at line 2 ## Another nice barplot example, posted to the R-help list by Martin ## Maechler. quartz( height = 4, width = 6 ) barplot( VADeaths, beside = TRUE, col = c( "lightblue", "mistyrose", "lightcyan", "lavender", "cornsilk" ), legend = rownames( VADeaths ), ylim = c( 0, 100 ) ) title( main = "Death Rates in Virginia", font.main = 4 )
barplot2()
## Package gplots requires packages gdata and gtools. library( package = gplots) par( ask = TRUE ) example( barplot2 )
boxplot()
## From example( boxplot ): boxplot( count ~ spray, data = InsectSprays, col = "lightgray" )
contour()
curve()
dotchart(),
heatmap()
image()
m <- matrix( rnorm( n = 100, mean = 0, sd = 1 ), ncol=10 ) image( m ) text( 0:9/9, 0:9/9, 0:9 ) ## Another example, posted to the R-help list by Roger Bivand. ## Compare these two image figures: get( getOption( "device" ) )() image( matrix( rnorm( 200 ), 10, 20 ) ) ## Draw the image without axes. get( getOption( "device" ) )() image( matrix( rnorm( 200 ), 10, 20 ), axes = FALSE ) ## Create the axes and axis labels and draw them. axis( 1, at = seq( 0, 1, length.out = 10 ), labels = LETTERS[ 1:10 ] ) axis( 2, at = seq( 0, 1, length.out = 20 ), labels = letters[ 1:20 ] ) box()
interaction.plot()
matplot()
par()
## Make R prompt before plotting each figure: op <- par( ask = TRUE ) example( persp ) par( op ) ## View current plotting parameters: par() par()$ask
persp()
## persp.r ## 05-Feb-2006 # ## Conrad Halling ## conrad.halling@sphaerula.com ## Create a bivariate normal distribution x <- rnorm( n = 10000, mean = 10, sd = 4 ) y <- rnorm( n = 10000, mean = 10, sd = 4 ) ## The z value is the density of values in the x-y plane. z <- matrix( data = 0, nrow = 21, ncol = 21 ) for ( i in seq( from = 1, to = length( x ), by = 1 ) ) { if ( ( x[ i ] >= 0 ) && ( x[ i ] <= 20 ) && ( y[ i ] >= 0 ) && ( y[ i ] <= 20 ) ) { xx <- round( x[ i ] ) yy <- round( y[ i ] ) z[ xx + 1, yy + 1 ] = z[ xx + 1, yy + 1 ] + 1 } } ## Display the density of values with a perspective plot. for ( i in seq( from = 0, to = 360, by = 2 ) ) { persp( x = 0:20, y = 0:20, z = z, theta = i, phi = 30, xlab = "x", ylab = "y", col = "lightblue", ltheta = -135, shade = 0.5, ticktype = "detailed" ) }
stars()
stripchart()
sunflowerplot()
symbols()
Numbers
Read FAQ 7.31 about testing floating point numbers for equality.
Formatting
## Set the default number of digits for the display of ## floating point numbers: options( digits = 3 ) ## Format numbers for output: format() formatC() prettyNum() sprintf()
NA
## Indicate missing data with NA y <- NA ## Test for NA with is.na(), not with == if ( is.na( y ) ) { ... ## is.na returns TRUE for NaN y <- 0 / 0 if ( is.na( y ) ) { ... ## is.nan returns TRUE only for NaN, not NA if ( is.nan( y ) ) { ... ## Set elements of a vector to NA x <- 1:4 is.na( x ) <- c( 2, 4 ) ## x == c( 1, NA, 3, NA ) v <- c( "red", "orange", "blue", "green" ) is.na( v ) <- c( 2, 4 ) ## v == c( "red", NA, "blue", NA ) ## Set a string to NA w <- as.character( NA ) ## Other functions: complete.cases() na.omit() na.fail() na.exclude() na.pass()
Rounding
## These functions work on vectors, too. x <- rnorm( 1 ) ## ceiling(): find the smallest integer not less than x. x.ceiling <- ceiling( x ) print( paste( "ceiling: ", paste( x, x.ceiling, sep = " => " ), sep = "" ) ) ## floor(): find the largest integer not greater than x. x.floor <- floor( x ) print( paste( "floor: ", paste( x, x.floor, sep = " => " ), sep = "" ) ) ## trunc(): form an integer by truncating x towards 0. x.trunc <- trunc( x ) print( paste( "trunc: ", paste( x, x.trunc, sep = " => " ), sep = "" ) ) ## round: round to the specified number of decimal places. x.round <- round( x, digits = 4 ) print( paste( "round: ", paste( x, x.round, sep = " => " ), sep = "" ) ) ## signif(): round x to the specified number of significant digits x.signif <- signif( x, digits = 4 ) print( paste( "signif: ", paste( x, x.signif, sep = " => " ), sep = "" ) ) ## Note: signif( -0.0345028, digits = 4 ) => [1] -0.0345 ## when it should be -0.03450; the final 0 is dropped during printing ## See http://finzi.psych.upenn.edu/R/Rhelp02a/archive/19953.html ## See http://finzi.psych.upenn.edu/R/Rhelp02a/archive/56966.html zapsmall()
Objects
## Get a search path for objects, a list of attached packages, data ## files, and attached objects (usually data.frames): search() ## List objects in the user environment: ls() objects() ## List objects in an attached package: ls( name = "package:methods" ) ls( name = "package:methods", all.names = TRUE ) ls( name = search()[ 4 ] ) ## Remove an object from the user environment: x <- 1:10 rm( x ) remove( x ) ## Remove all objects from the user environment: rm( list = ls() ) ## Find conflicts in object names: conflicts( detail = TRUE ) ## Remove objects from the user workspace that mask other objects: rm( list = conflicts( detail = TRUE )$.GlobalEnv ) ## Return a list of objects containing "mat" in their names ## (e.g., "as.matrix", "subset.matrix": apropos( what = "mat" ) ## Find the location of a specific object. ## Here, returns "package:base". find( what = "subset.matrix" ) ## returns "package:base" ## Find the locations of objects containing "mat" in their names: find( what = "mat", simple.words = FALSE ) ## Return the value of a named object. get() ## Access exported and internal variables in a name space. ?"::" base::log base::"+" ## Save specific objects to an R data file. x <- rnorm( 10 ) y <- rpois( 10, 2 ) save( x, y, file = "my.RData" ) ## Save all user objects to the file .RData, which will be loaded ## automatically when R is restarted from the working directory. save.image() save.image( file = "~/.RData" ) save( list = ls( all = TRUE ), file = ".RData" ) ## Reload a data set saved with the function save(). ## A file named .RData, located in the directory from which R is ## started, is loaded automatically on startup. load( file = "my.RData" ) load( file = "~/.RData" ) ## Load a data set via a URL connection. con <- url( "http://www.maths.anu.edu.au/~johnm/r/dsets/usingR.RData" ) load( con ) close( con ) ## Attach a data set saved with the function save(). attach( what = "my.RData" ) ## Detach an attached file. detach( name = "file:my.RData" )
Packages
## Use the HTML help to get information about packages. When the browser ## window opens, click on the link for "Packages": help.start() ## Learn about an individual package: library( help = MASS ) help( package = MASS ) packageDescription( pkg = "MASS" ) ## Load a package from the command line: library( package = MASS ) ## Load a package from within a function: require( package = MASS ) ## Detach a package: detach( name = package:MASS ) ## Get a list of loaded packages and attached objects: search() ## Get a list of installed packages: library() ## Get more information about installed packages, returned in a matrix: installed.packages() ## Get information about a specific package: installed.packages()[ "MASS", ] ## Get information about available packages: available.packages() ## Get a character vector containing the names of installed packages: .packages( all.available = TRUE ) ## Get information about data in packages that have been loaded: data() ## Get information about data in installed packages: data( package = "MASS" ) data( package = .packages( all.available = TRUE ) )
fortunes
## Just for fun: library( package = fortunes ) fortune()
To view an R fortune every time you start R, create a file named .Rprofile
in your home directory, with the following contents:
if ( interactive() ) { require( package = fortunes, quietly = TRUE ) print( fortune() ) }
See ?.Rprofile
for more information.
Platform, machine, and version characteristics
## Get platform characteristics as a list: .Platform ## Get machine characteristics as a list; these include the numerical ## characteristics and the sizes of basic variable types: .Machine ## Get R version information: R.version ## a simple.list variable version ## identical to R.version, for S-Plus compatibility R.version.string ## a character string variable, e.g. ## "R version 2.2.1, 2005-12-20" R.Version() ## a function that returns the contents of R.version ## as a list getRversion() ## a function that returns the version of R ## e.g., "2.2.1" ## Get system information: Sys.info() ## get system and user information from system calls ## Get session information: sessionInfo() ## returns R version, platform, attached packages
Stats
anova() aov() C() cor.test() glm() lm() t.test()
Correlation, variance, and covariance
## Help for cor(), var(), cov(), and cov2cor() functions: help( cor ) ## Simple examples: x <- rnorm( n = 1000, mean = 0, sd = 1 ) y <- rnorm( n = 1000, mean = 0, sd = 1 ) cov( x = x, y = y, method = "pearson" ) cor( x = x, y = y, method = "pearson" ) ##also "kendall", "spearman" ## Testing the significance of correlation: cor.test( x = x, y = y, alternative = "two.sided", method = "pearson", conf.level = 0.95 ) ## For more than two variables, a matrix or data.frame is required: z <- rnorm( n = 1000, mean = 0, sd = 1 ) w <- data.frame( x = x, y = y, z = z ) cov( x = w, method = "pearson" ) cor( x = w, method = "pearson" ) ## Testing the significant of correlation: ## These must be pairwise. cor.test( ~ x + y, data = w, method = "pearson" ) ## Convert a covariance matrix to a correlation matrix: w.cov <- cov( x = w, method = "pearson" ) w.cor <- cov2cor( w.cov )
density()
## A basic example: x1 <- runif( 100, 10, 80 ) x2 <- runif( 100, 1, 100 ) d1 <- density( x1, kernel = "gaussian", width = 20 ) plot( d1, type = "l", xlim=c( 0, 100 ), ylim = c( 0.0, 0.1 ), col = "red", main = "" ) d2 <- density( x2, kernel = "gaussian", width = 20 ) lines( d2, col = "blue" )
Linear Models
Contrasts
## Examples of contrast types: contr.helmer( n = 3 ) contr.poly( n = 3 ) contr.sum( n = 3 ) contr.treatment( n = 3, base = 1 ) contr.SAS( n = 3 ) ## Set default contrast types for unordered and ordered factors: options( contrasts = c( "contr.treatment", "contr.poly" ) )
Model Matrix (Design Matrix)
## A formula and a data set are required to build a model matrix. require( datasets ) ff <- log( Volume ) ~ log( Height ) + log( Girth ) mm <- model.matrix( object = ff, data = trees )
Multiple Comparisons
p.adjust() pairwise.t.test() pairwise.prop.test() pairwise.wilcox.test() pairwise.table()
Strings
as.character() bquote() cat() encodeString() format() formatC() gettext() gettextf() paste() print() quote() sprintf() substitute() text() toString()
Subsetting
## Subset based on logical relationships: x <- c( 4, 7, 12, 19, 6 ) y <- x[ x < 12 ] ##y == c( 4, 7, 6 ) ## Subset based on indexes: z <- x[ c( 2, 4 ) ] ##z == c( 7, 19 ) w <- x[ -c( 2, 4 ) ] ##w == c( 4, 12, 6 )