Basic data objects
Vectors
## Create numeric, character, and logical vectors: weight <- c( 64, 78, 66, 54 ) name <- c( "Clarence", "Robert", "Elizabeth", "Charlene" ) heavy <- weight > 70 print( heavy ) ## Indicate missing data with NA: height <- c( 62, NA, 65, 72 ) ## Functions and operators for creating vectors: c( 4.17, 4.18, 4.20, 4.16, 4.17 ) print( 1:10 ) seq( from = 4, to = 8, by = 0.5 ) seq( from = -10, to = 10, length = 51 ) ## Create a vector containing ## 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 rep( x = 1:2, times = c( 10, 15 ) ) ## Create a vector containing ## 1 2 1 2 1 2 1 2 rep( x = 1:2, times = 4 ) ## Create a vector containing ## 1 1 1 1 2 2 2 2 rep( x = 1:2, each = 4 ) c( rep( 1, 4 ), rep( 2, 4 ) ) ## Built-in vectors: LETTERS letters month.name month.abb
Matrices
## The names() function for a matrix object appears to have different ## behavior than the names() function for a data.frame object. But ## really, all they do is set the names attribute on either object type. ## No, a data.frame does not have a names attribute.
Arrays
Lists
Data.frames
## Create a data.frame using artificial data.
ad <- data.frame(
x = rnorm( n = 20 ),
y = rpois( n = 20, lambda = 2 ),
z = rt( n = 20, df = 12 ) )
row.names( ad ) <- letters[ 1:length( ad$x ) ]
## Get the names of the rows (the observation names)
## as a character vector.
row.names( x = ad )
rownames( x = ad )
## Get the names of the columns (the variable names)
## as a character vector.
names( x = ad )
colnames( x = ad )
## Get an individual column from a data.frame as a vector.
## These all return the same result, a vector.
ad$x ## All rows, column with name "x"
ad[ , "x" ] ## All rows, column with name "x"
ad[ , 1 ] ## All rows, column 1
ad[[ 1 ]] ## Get component 1 from the data.frame
## Get an individual row from a data.frame as a data.frame.
ad[ 2, ] ## Returns an object of class data.frame
## Get an individual row from a data.frame as a vector.
## Depending on circumstances, the returned vector is often a
## character vector.
unlist( ad[ 2, ] ) ## Returns a vector
## Create a new data.frame using specified columns of an existing
## data.frame.
ad2 <- ad[ c( 1, 3 ) ]
## Get a summary of a data.frame.
summary( object = ad )
## Apply a function to each column of a data.frame.
## Returns a list the same length as X.
lapply( X = ad, FUN = mean ) ## returns a list
lapply( X = ad, FUN = quantile, probs = 1:3/4 ) ## returns a list
## Apply a function to each column of a data.frame.
## Returns a vector or a matrix when possible.
sapply( X = ad, FUN = mean ) ## returns a vector
sapply( X = ad, FUN = quantile, probs = 1:3/4 ) ## returns a matrix
## This example, from Data Analysis and Graphics Using R, by
## John Maindonald and John Braun, p. 15, calculates growth
## rates of Australian populations from 1917 to 1997.
library( package = DAAG )
sapply(
X = austpop[ -c( 1, 10 ) ],
FUN = function( x ) { ( x[ 9 ] - x[ 1 ] ) / x[ 1 ] } )
## Plotting using sapply(). This example is from the solutions to
## Data Analysis and Graphics Using R, by John Maindonald and John Braun,
## p. 18, problem 10. invisible() hides the list returned by sapply, which
## contains NULL components. lapply() can be used here instead of sapply().
library( package = DAAG )
get( getOption( "device" ) )()
oldpar <- par( mfrow = c( 2, 4 ) )
invisible(
sapply(
X = 2:9,
FUN = function( i, df )
{
plot(
x = df[ , 1 ],
y = log( df[ , i ] ),
xlab = "Year",
ylab = names( df )[ i ],
pch = 16,
ylim = c( 0, 10 ) )
},
df = austpop ) )
## Identify rows with NA values.
library( package = DAAG )
possum[ !complete.cases( possum ), ] ## 3 rows returned
## Create a new data.frame, omitting rows containing NA.
dim( possum ) ## 104 rows
newpossum <- na.omit( possum )
dim( newpossum ) ## 101 rows
## Functions for manipulating vectors, matrices, lists, and data.frames
cbind()
rbind()
aggregate()
data.frame()
data.matrix()
list()
matrix()
merge()
Viewing and Editing
## Return the first or last part of a vector, matrix, data.frame, or ## function. head( CO2 ) tail( CO2 ) str( CO2 ) page( CO2 ) file.show() fix( CO2 ) edit( CO2 ) ## More useful for editing functions
Control flow
## Get help on if, for, while, repeat, break, and next
?"if"
## Loop using for
for ( i in 1:10 ) {
if ( i == 2 ) next
if ( i == 6 ) break
print( i )
}
## Loop using while
limit <- 10
i <- 1
while ( i <= limit ) {
i <- i + 1
if ( i == 2 ) next
if ( i == 6 ) break
print( i )
}
## Loop using repeat
i <- 1
repeat {
i <- i + 1
if ( i == 2 ) next
if ( i == 6 ) break
print( i )
}
switch()
Data sets
Files
See objects.
Packages
## Get information about data in packages that have been loaded: data() ## Get information about data in installed packages: data( package = MASS ) data( package = .packages( all.available = TRUE ) ) ## all installed packages
Viewing
## Return the first or last part of a vector, matrix, data.frame, or ## function. head( CO2 ) tail( CO2 )
Demonstrations
## Get lists of available demonstrations: demo() demo( package = .packages( all.available = TRUE ) ) ## Run some demonstrations: demo( graphics ) demo( persp )
Devices
## Open a default screen device for any platform
get( getOption( "device" ) )()
## Device commands:
dev.cur()
dev.list()
dev.next()
dev.prev()
dev.off()
dev.set()
graphics.off()
capabilities()
## Copy a plot to a PNG file.
## X11 must be running in order to create the PNG file.
## The default name of the file is Rplot001.png, which is created
## in R's working directory.
get( getOption( "device" ) )()
plot( y = rnorm( 10 ), x = rnorm( 10 ), main = "Example Plot" )
dev.copy(
device = png,
width = 7 * 72,
height = 7 * 72 )
dev.off()
## Copy a plot to a PNG file. The image quality is not as good as that
## produced by dev.copy() (see above).
## X11 must be running.
## ghostscript must be installed; its path can be designated in the
## environment variable R_GSCMD.
get( getOption( "device" ) )()
plot( y = rnorm( 10 ), x = rnorm( 10 ), main = "Example Plot" )
Sys.putenv( R_GSCMD = "/sw/bin/gs" )
dev2bitmap(
file = "example.png",
type = "png256",
height = 7,
width = 7 )
dev.print()
dev.copy2eps()
dev.control()
## A dev.new() function that opens a new default device.
dev.new <- function() get( getOption( "device" ) )()
dev.new()
Documentation and help
Documentation
## View locally installed documentation: help.start()
Examples
## Run the example code for function persp: example( persp ) ## Make R prompt before plotting each figure: op <- par( ask = TRUE ) example( persp ) par( op )
Help
help( help ) ## help() and ? are synonymous help( plot ) ?plot ## The help pages give example code at the bottom. ## Use example() to run the example code. example( plot ) ## help( function ) gives "Error: syntax error" ## Must put function into quotes help( "function" ) ## Special symbols must be quoted to get help help( "[[" ) ?"[[" help( "<-" ) ## Help for packages library( help = MASS ) help( package = MASS ) package?MASS ## Help for datasets data() data( package = MASS ) data( package = .packages( all.available = TRUE ) ) library( package = MASS ) ?bacteria
Searching
## Perform a local search: help.search( "as.data.frame" ) ## Perform an on-line search of R manuals, help pages, and ## mailing list archives: RSiteSearch( "svg" )
Vignettes
Vignettes are PDF-based documentation included in packages.
## Get a list of vignettes that are available in loaded packages: vignette() ## Get a list of vignettes from a single package: vignette( package = "grid" ) ## View specific vignettes. vignette( topic = "grid" ) vignette( topic = "rotated", package = "grid" ) ## Extract the code examples from a vignette into an editor. edit( vignette( topic = "grid" ) )
Factors
factor()
as.factor()
is.factor()
ordered()
as.ordered()
is.ordered()
levels()
nlevels()
relevel()
gl() ##generate factor levels
cut() ##convert numeric to factor
## split() divide into groups
example( split ) ##study the examples carefully
n <- 1000
m <- 49
data <-
data.frame(
x = rnorm( n ),
y = trunc( runif( n, min = 1, max = m + 1 ) ) )
splits <- split( x = data$x, f = data$y )
o <- order( sapply( splits, median ), decreasing = TRUE )
boxplot( splits[ o ], horizontal = TRUE )
help( "[.factor" )
help( Extract )
Files
setwd( "/Volumes/CHHalling/R" ) getwd() list.files() source( file = "paletteDemo.R", echo = TRUE )
File and directory manipulation
## This section is incomplete. ?files file.create() file.exists() file.remove() file.rename() file.append() file.copy() file.symlink() dir.create() ## Get and set the working directory. getwd() setwd() ## List the files in a directory. ?list.files list.files() dir() ## Equivalent to list.files() ## Construct path to a file ?file.path pathElements <- c( "Users", "challing", "myScript.R" ); filePath <- file.path( pathElements ); ## Ascertain file access ?file.access file.access( fileNames, mode = 0 ) ## mode 0 tests for existence file.exists( fileNames ); ## Extract file information ?file.info fileNames <- c( "/Users/challing/myScript.R", "/Users/challing/anotherScript.R" ) fileInfo <- file.info( fileNames ) ## Display files ?file.show file.show( fileNames );
Data files
R data files
R history
R profile
R scripts
Functions
## Get help for a function: help( plot ) ## Get a function's arguments args( plot ) ## View the code for a function: plot ## View examples of how to use a function: example( plot )
Graphics
Colors
col2rgb() colors() hcl() hsv() palette() rgb() rgb2hsv() ## See also packages colorspace and RColorBrewer.
## Create vectors of n contiguous colors: gray( 0:8 / 8 ) rainbow( 12 ) heat.colors( 12 ) terrain.colors( 12 ) topo.colors( 12 ) cm.colors( 12 ) example( rainbow )
## Functions for plotting and modifying plots.
abline()
arrows()
axis()
bquote()
box()
dquote()
expression() ## See ?plotmatch and demo( plotmath )
jitter()
legend()
## Placing two different items in a legend
## (posted to the R-help list by Arne Henningsen):
barplot(c(1:10))
lines(c(1:11),5*abs(sin(c(1:11))))
legend( 1.25, 10, c(" bar"), fill="grey", bty="n" )
legend( 1, 9.5, c("line"), lty=1, bty="n" )
rect(1,8.7,3,10)
lines()
matlines()
mtext()
plot()
points()
polygon()
rect()
rug()
segments()
squote()
substitute()
text()
title()
thigmophobe() ## in package plotrix
thigmophobe.labels() ## in package plotrix
barplot()
## Nice examples of barplots:
par( ask = TRUE )
example( barplot )
## Barplot with rotated labels.
## This example was posted to the R-help list by Marc Schwartz.
## Create default device.
get( getOption( "device" ) )()
## Increase bottom margin to make room for rotated labels
par( mar = c( 7, 4, 4, 2 ) + 0.1 )
## Create plot and get bar midpoints in 'mp'
mp <- barplot( 1:10 )
## Set up x axis with tick marks alone
axis( 1, at = mp, labels = FALSE )
## Create some text labels
labels <- paste( "Label", 1:10, sep = " " )
## Plot x axis labels at mp
text( mp, par( "usr" )[ 3 ] - 0.5, srt = 45, adj = 1,
labels = labels, xpd = TRUE )
## Plot x axis label at line 4
mtext( 1, text = "X Axis Label", line = 4 )
## This example was posted to the R-help list by
## Francisco J. Zagmutt.
tab=table(rpois(100, 2)) #Creates table to make a barplot
par(mar = c(6, 4, 4, 2) + 0.1)#Prepares margin to fit x axis labels
pts=barplot(tab, xaxt = "n", xlab = "", col="yellow2")#Barplot of tab
without x axis or label.
#Also stores the middle points of the bars
axis(side=1,at=pts, labels=F, tick=T)#Creates x axis with tickmarks exactly
at the middle of the bars
nam=paste("Text",names(tab))#Names of each category. To be used on labels
text(pts, par("usr")[3] - 1.5, srt = 45, adj = 1, labels = nam, xpd = TRUE)
#Adds the tickmark labels
##adj = 1 will place the text at the end ot the tick marks
##xpd = TRUE will "clip" text outside the plot region
mtext(1, text = "My Axis Label", line = 4) #Adds x axis label at line 2
## Another nice barplot example, posted to the R-help list by Martin
## Maechler.
quartz( height = 4, width = 6 )
barplot(
VADeaths,
beside = TRUE,
col = c( "lightblue", "mistyrose", "lightcyan",
"lavender", "cornsilk" ),
legend = rownames( VADeaths ),
ylim = c( 0, 100 ) )
title( main = "Death Rates in Virginia", font.main = 4 )
barplot2()
## Package gplots requires packages gdata and gtools. library( package = gplots) par( ask = TRUE ) example( barplot2 )
boxplot()
## From example( boxplot ): boxplot( count ~ spray, data = InsectSprays, col = "lightgray" )
contour()
curve()
dotchart(),
heatmap()
image()
m <- matrix( rnorm( n = 100, mean = 0, sd = 1 ), ncol=10 ) image( m ) text( 0:9/9, 0:9/9, 0:9 ) ## Another example, posted to the R-help list by Roger Bivand. ## Compare these two image figures: get( getOption( "device" ) )() image( matrix( rnorm( 200 ), 10, 20 ) ) ## Draw the image without axes. get( getOption( "device" ) )() image( matrix( rnorm( 200 ), 10, 20 ), axes = FALSE ) ## Create the axes and axis labels and draw them. axis( 1, at = seq( 0, 1, length.out = 10 ), labels = LETTERS[ 1:10 ] ) axis( 2, at = seq( 0, 1, length.out = 20 ), labels = letters[ 1:20 ] ) box()
interaction.plot()
matplot()
par()
## Make R prompt before plotting each figure: op <- par( ask = TRUE ) example( persp ) par( op ) ## View current plotting parameters: par() par()$ask
persp()
## persp.r
## 05-Feb-2006
#
## Conrad Halling
## conrad.halling@sphaerula.com
## Create a bivariate normal distribution
x <- rnorm( n = 10000, mean = 10, sd = 4 )
y <- rnorm( n = 10000, mean = 10, sd = 4 )
## The z value is the density of values in the x-y plane.
z <- matrix( data = 0, nrow = 21, ncol = 21 )
for ( i in seq( from = 1, to = length( x ), by = 1 ) )
{
if ( ( x[ i ] >= 0 ) && ( x[ i ] <= 20 ) &&
( y[ i ] >= 0 ) && ( y[ i ] <= 20 ) )
{
xx <- round( x[ i ] )
yy <- round( y[ i ] )
z[ xx + 1, yy + 1 ] = z[ xx + 1, yy + 1 ] + 1
}
}
## Display the density of values with a perspective plot.
for ( i in seq( from = 0, to = 360, by = 2 ) )
{
persp(
x = 0:20,
y = 0:20,
z = z,
theta = i,
phi = 30,
xlab = "x",
ylab = "y",
col = "lightblue",
ltheta = -135,
shade = 0.5,
ticktype = "detailed" )
}
stars()
stripchart()
sunflowerplot()
symbols()
Numbers
Read FAQ 7.31 about testing floating point numbers for equality.
Formatting
## Set the default number of digits for the display of ## floating point numbers: options( digits = 3 ) ## Format numbers for output: format() formatC() prettyNum() sprintf()
NA
## Indicate missing data with NA
y <- NA
## Test for NA with is.na(), not with ==
if ( is.na( y ) ) { ...
## is.na returns TRUE for NaN
y <- 0 / 0
if ( is.na( y ) ) { ...
## is.nan returns TRUE only for NaN, not NA
if ( is.nan( y ) ) { ...
## Set elements of a vector to NA
x <- 1:4
is.na( x ) <- c( 2, 4 ) ## x == c( 1, NA, 3, NA )
v <- c( "red", "orange", "blue", "green" )
is.na( v ) <- c( 2, 4 ) ## v == c( "red", NA, "blue", NA )
## Set a string to NA
w <- as.character( NA )
## Other functions:
complete.cases()
na.omit()
na.fail()
na.exclude()
na.pass()
Rounding
## These functions work on vectors, too.
x <- rnorm( 1 )
## ceiling(): find the smallest integer not less than x.
x.ceiling <- ceiling( x )
print(
paste(
"ceiling: ",
paste( x, x.ceiling, sep = " => " ),
sep = "" ) )
## floor(): find the largest integer not greater than x.
x.floor <- floor( x )
print(
paste(
"floor: ",
paste( x, x.floor, sep = " => " ),
sep = "" ) )
## trunc(): form an integer by truncating x towards 0.
x.trunc <- trunc( x )
print(
paste(
"trunc: ",
paste( x, x.trunc, sep = " => " ),
sep = "" ) )
## round: round to the specified number of decimal places.
x.round <- round( x, digits = 4 )
print(
paste(
"round: ",
paste( x, x.round, sep = " => " ),
sep = "" ) )
## signif(): round x to the specified number of significant digits
x.signif <- signif( x, digits = 4 )
print(
paste(
"signif: ",
paste( x, x.signif, sep = " => " ),
sep = "" ) )
## Note: signif( -0.0345028, digits = 4 ) => [1] -0.0345
## when it should be -0.03450; the final 0 is dropped during printing
## See http://finzi.psych.upenn.edu/R/Rhelp02a/archive/19953.html
## See http://finzi.psych.upenn.edu/R/Rhelp02a/archive/56966.html
zapsmall()
Objects
## Get a search path for objects, a list of attached packages, data ## files, and attached objects (usually data.frames): search() ## List objects in the user environment: ls() objects() ## List objects in an attached package: ls( name = "package:methods" ) ls( name = "package:methods", all.names = TRUE ) ls( name = search()[ 4 ] ) ## Remove an object from the user environment: x <- 1:10 rm( x ) remove( x ) ## Remove all objects from the user environment: rm( list = ls() ) ## Find conflicts in object names: conflicts( detail = TRUE ) ## Remove objects from the user workspace that mask other objects: rm( list = conflicts( detail = TRUE )$.GlobalEnv ) ## Return a list of objects containing "mat" in their names ## (e.g., "as.matrix", "subset.matrix": apropos( what = "mat" ) ## Find the location of a specific object. ## Here, returns "package:base". find( what = "subset.matrix" ) ## returns "package:base" ## Find the locations of objects containing "mat" in their names: find( what = "mat", simple.words = FALSE ) ## Return the value of a named object. get() ## Access exported and internal variables in a name space. ?"::" base::log base::"+" ## Save specific objects to an R data file. x <- rnorm( 10 ) y <- rpois( 10, 2 ) save( x, y, file = "my.RData" ) ## Save all user objects to the file .RData, which will be loaded ## automatically when R is restarted from the working directory. save.image() save.image( file = "~/.RData" ) save( list = ls( all = TRUE ), file = ".RData" ) ## Reload a data set saved with the function save(). ## A file named .RData, located in the directory from which R is ## started, is loaded automatically on startup. load( file = "my.RData" ) load( file = "~/.RData" ) ## Load a data set via a URL connection. con <- url( "http://www.maths.anu.edu.au/~johnm/r/dsets/usingR.RData" ) load( con ) close( con ) ## Attach a data set saved with the function save(). attach( what = "my.RData" ) ## Detach an attached file. detach( name = "file:my.RData" )
Packages
## Use the HTML help to get information about packages. When the browser ## window opens, click on the link for "Packages": help.start() ## Learn about an individual package: library( help = MASS ) help( package = MASS ) packageDescription( pkg = "MASS" ) ## Load a package from the command line: library( package = MASS ) ## Load a package from within a function: require( package = MASS ) ## Detach a package: detach( name = package:MASS ) ## Get a list of loaded packages and attached objects: search() ## Get a list of installed packages: library() ## Get more information about installed packages, returned in a matrix: installed.packages() ## Get information about a specific package: installed.packages()[ "MASS", ] ## Get information about available packages: available.packages() ## Get a character vector containing the names of installed packages: .packages( all.available = TRUE ) ## Get information about data in packages that have been loaded: data() ## Get information about data in installed packages: data( package = "MASS" ) data( package = .packages( all.available = TRUE ) )
fortunes
## Just for fun: library( package = fortunes ) fortune()
To view an R fortune every time you start R, create a file named .Rprofile
in your home directory, with the following contents:
if ( interactive() ) {
require( package = fortunes, quietly = TRUE )
print( fortune() )
}
See ?.Rprofile for more information.
Platform, machine, and version characteristics
## Get platform characteristics as a list:
.Platform
## Get machine characteristics as a list; these include the numerical
## characteristics and the sizes of basic variable types:
.Machine
## Get R version information:
R.version ## a simple.list variable
version ## identical to R.version, for S-Plus compatibility
R.version.string ## a character string variable, e.g.
## "R version 2.2.1, 2005-12-20"
R.Version() ## a function that returns the contents of R.version
## as a list
getRversion() ## a function that returns the version of R
## e.g., "2.2.1"
## Get system information:
Sys.info() ## get system and user information from system calls
## Get session information:
sessionInfo() ## returns R version, platform, attached packages
Stats
anova() aov() C() cor.test() glm() lm() t.test()
Correlation, variance, and covariance
## Help for cor(), var(), cov(), and cov2cor() functions:
help( cor )
## Simple examples:
x <- rnorm( n = 1000, mean = 0, sd = 1 )
y <- rnorm( n = 1000, mean = 0, sd = 1 )
cov( x = x, y = y, method = "pearson" )
cor( x = x, y = y, method = "pearson" ) ##also "kendall", "spearman"
## Testing the significance of correlation:
cor.test(
x = x,
y = y,
alternative = "two.sided",
method = "pearson",
conf.level = 0.95 )
## For more than two variables, a matrix or data.frame is required:
z <- rnorm( n = 1000, mean = 0, sd = 1 )
w <- data.frame( x = x, y = y, z = z )
cov( x = w, method = "pearson" )
cor( x = w, method = "pearson" )
## Testing the significant of correlation:
## These must be pairwise.
cor.test(
~ x + y,
data = w,
method = "pearson" )
## Convert a covariance matrix to a correlation matrix:
w.cov <- cov( x = w, method = "pearson" )
w.cor <- cov2cor( w.cov )
density()
## A basic example:
x1 <- runif( 100, 10, 80 )
x2 <- runif( 100, 1, 100 )
d1 <- density( x1, kernel = "gaussian", width = 20 )
plot( d1, type = "l", xlim=c( 0, 100 ), ylim = c( 0.0, 0.1 ),
col = "red", main = "" )
d2 <- density( x2, kernel = "gaussian", width = 20 )
lines( d2, col = "blue" )
Linear Models
Contrasts
## Examples of contrast types: contr.helmer( n = 3 ) contr.poly( n = 3 ) contr.sum( n = 3 ) contr.treatment( n = 3, base = 1 ) contr.SAS( n = 3 ) ## Set default contrast types for unordered and ordered factors: options( contrasts = c( "contr.treatment", "contr.poly" ) )
Model Matrix (Design Matrix)
## A formula and a data set are required to build a model matrix. require( datasets ) ff <- log( Volume ) ~ log( Height ) + log( Girth ) mm <- model.matrix( object = ff, data = trees )
Multiple Comparisons
p.adjust() pairwise.t.test() pairwise.prop.test() pairwise.wilcox.test() pairwise.table()
Strings
as.character() bquote() cat() encodeString() format() formatC() gettext() gettextf() paste() print() quote() sprintf() substitute() text() toString()
Subsetting
## Subset based on logical relationships: x <- c( 4, 7, 12, 19, 6 ) y <- x[ x < 12 ] ##y == c( 4, 7, 6 ) ## Subset based on indexes: z <- x[ c( 2, 4 ) ] ##z == c( 7, 19 ) w <- x[ -c( 2, 4 ) ] ##w == c( 4, 12, 6 )