Data Import

Below is a comparison of the commands used to read data into in R, SAS, SPSS and Stata. The variables gender and workshop are categorical factors and q1 to q4, pretest and posttest are considered continuous and normally distributed.

The practice data set is shown here. The programs and the data they use are also available for download here. Examples that are missing for Stata reflect differences between the two books. We will get around to those when we write the second edition of R for Stata Users.

Reading a Comma-Delimited Text File

R

# With id variable not named.
mydata <- read.csv("mydata.csv")

# With id named in the header.
mydata <- read.csv("mydataID.csv", row.names = "id")

SAS

PROC IMPORT OUT=myLib.mydata
DATAFILE="mydataID.csv"
DBMS=CSV REPLACE;
GETNAMES=YES;
DATAROW=2;
RUN;
PROC PRINT; RUN;

SPSS

GET DATA  /TYPE=TXT
/FILE='mydataID.csv'
/DELCASE=LINE
/DELIMITERS=","
/ARRANGEMENT=DELIMITED
/FIRSTCASE=2
/IMPORTCASE=ALL
/VARIABLES=id F1.0
workshop F1.0
gender A1.0
q1 F1.0  q2 F1.0
q3 F1.0  q4 F1.0 .
LIST.
SAVE OUTFILE='mydata.sav'.

 Stata


clear
insheet using
"mydata.csv",
comma names
list

 

Reading a Tab-delimited Text File

R

# With id variable not named.
mydata <-read.delim("mydata.tab")
mydata

# With ID named in the header.
mydata <-
read.delim("mydataID.tab",
row.names = "id")
mydata

SAS

PROC IMPORT
OUT=myLib.mydata
DATAFILE="mydataID.tab"
DBMS=TAB REPLACE;
GETNAMES=YES;
DATAROW=2;
RUN;
PROC PRINT; RUN;

SPSS

GET DATA
/TYPE=TXT
/FILE="mydataID.tab"
/DELCASE=LINE
/DELIMITERS="t"
/ARRANGEMENT=DELIMITED
/FIRSTCASE=2
/IMPORTCASE=ALL
/VARIABLES = id F1.0  workshop F1.0 gender A1.0
q1 F1.0  q2 F1.0  q3 F1.0  q4 F1.0 .
LIST.
EXECUTE.
DATASET NAME DataSet1 WINDOW=FRONT.

 Stata

 

clear
insheet using "mydataID.tab",
tab names
list

Reading a comma-delimited text file from a web site

R


myURL  <- "http:// (enter URL here)"
mydata <- read.csv(myURL)
mydata

SAS


FILENAME myURL URL
"http://( enter URL here)";
PROC IMPORT DATAFILE= myURL
DBMS=CSV REPLACE
OUT= myLib.mydata;
GETNAMES=YES;
DATAROW=2;
RUN;
PROC PRINT; RUN;

SPSS

SPSS cannot read text files from a URL directly. It can read SPSS, SAS, Excel, and Stata files from a URL through the SPSSINC GETURI DATA extension.

 

Reading Data Within a Program

R


# This works interactively
# See book for alternative
# approach for sourced files.

mydata <- read.csv( stdin() )
workshop,gender,q1,q2,q3,q4
1,1,f,1,1,5,1
2,2,f,2,1,4,1
3,1,f,2,2,4,3
4,2,NA,3,1,NA,3
5,1,m,4,5,2,4
6,2,m,5,4,5,5
7,1,m,5,3,4,4
8,2,m,4,5,5,5

# Blank line above ends input.

SAS


LIBNAME myLib 'C:myRfolder';
DATA myLib.mydata;
INFILE DATALINES DELIMITER = ','
MISSOVER DSD firstobs=2 ;
INPUT id workshop gender $ q1 q2 q3 q4;
DATALINES;
id,workshop,gender,q1,q2,q3,q4
1,1,f,1,1,5,1
2,2,f,2,1,4,1
3,1,f,2,2,4,3
4,2, ,3,1, ,3
5,1,m,4,5,2,4
6,2,m,5,4,5,5
7,1,m,5,3,4,4
8,2,m,4,5,5,5
PROC PRINT; RUN;

SPSS


DATA LIST / id 2 workshop 4
gender 6 (A)
q1 8  q2 10  q3 12  q4 14.
BEGIN DATA.
1,1,f,1,1,5,1
2,2,f,2,1,4,1
3,1,f,2,2,4,3
4,2, ,3,1, ,3
5,1,m,4,5,2,4
6,2,m,5,4,5,5
7,1,m,5,3,4,4
8,2,m,4,5,5,5
END DATA.
LIST.
SAVE OUTFILE='C:myRfoldermydata.sav'.

Stata


clear all
input id workshop str1 gender q1-q4, automatic
1 1 f 1 1 5 1
2 2 f 2 1 4 1
3 1 f 2 2 4 3
4 2 . 3 1 . 3
5 1 m 4 5 2 4
6 2 m 5 4 5 5
7 1 m 5 3 4 4
8 2 m 4 5 5 5
end
list

Reading Multiple Observations Per Line

R


# This works interactively
# See book for alternative
# approach for sourced files.

mylist <- scan( stdin(),
what = list(
id = 0,
workshop = 0,
gender   = "",
q1 = 0, q2 = 0,
q3 = 0, q4 = 0))
1 1 f 1 1 5 1    2 2 f 2 1 4 1
3 1 f 2 2 4 3    4 2 NA 3 1 NA 3
5 1 m 4 5 2 4    6 2 m 5 4 5 5
7 1 m 5 3 4 4    8 2 m 4 5 5 5

# Blank line above ends input.
mylist

mydata <- data.frame(mylist)
mydata

SAS


DATA mydata;
INPUT id workshop gender $ q1-q4 @@;
DATALINES;
1 1 f 1 1 5 1    2 2 f 2 1 4 1
3 1 f 2 2 4 3    4 2 . 3 1 . 3
5 1 m 4 5 2 4    6 2 m 5 4 5 5
7 1 m 5 3 4 4    8 2 m 4 5 5 5
;
PROC PRINT; RUN;

SPSS


DATA LIST FREE/ id (f1.0)  workshop (f1.0) gender (A)
q1 (f1.0)  q2 (f1.0)  q3 (f1.0) q4 (f1.0).

BEGIN DATA.
1,1,f,1,1,5,1,    2,2,f,2,1,4,1
3,1,f,2,2,4,3,    4,2, ,3,1, ,3
5,1,m,4,5,2,4,    6,2,m,5,4,5,5
7,1,m,5,3,4,4,    8,2,m,4,5,5,5
END DATA.

LIST.

 

Reading Fixed-Width Text Files, One Record per Case

R


myfile <- "mydataFWF.txt"
myVarNames  <-
c("id", "gender", "q1",
"q2", "q3", "q4")
myVarWidths <-
c(2, -1, 1, 1, 1, 1, 1)

mydata <- read.fwf(
file        = myfile,
width       = myVarWidths,
col.names   = myVarNames,
row.names   = "id",
na.strings  = "",
fill        = TRUE,
strip.white = TRUE)
mydata

SAS

DATA myLib.mydata;
INFILE 'mydataFWF.txt'
MISSOVER;
INPUT id 1-2 workshop 3
gender $ 4
q1 5  q2 6  q3 7  q4 8;
RUN;


SPSS

DATA LIST FILE='mydataFWF.txt'
RECORDS=1
/1 id 1-2 workshop 3
gender 4 (A)  q1 5  q2 6
q3 7  q4 8.
LIST.

Stata

infix id 1-2 workshop 3
gender 4 q1 5 q2 6 q3 7
q4 8 ///
using mydataFWF.txt, clear
list

Reading Excel Files

R

# Do this once to install:
install.packages("xlsReadWrite")
library("xlsReadWrite")
xls.getshlib()

# Now read the file:
library("xlsReadWrite")
mydata <- read.xls("mydata.xls")
mydata

SAS

PROC IMPORT
OUT = mydata
DATAFILE = "mydata.xls"
DBMS = EXCELCS REPLACE;
RANGE    = "Sheet1$";
SCANTEXT = YES;
USEDATE  = YES;
SCANTIME = YES;
RUN;

SPSS

GET DATA
/TYPE=XLS
/FILE='mydata.xls'
/SHEET=name 'Sheet1'
/CELLRANGE=full
/READNAMES=on
/ASSUMEDSTRWIDTH=32767.
EXECUTE.

Stata

This feature has been added to the latest version of Stata, but I haven’t gotten around to learning it yet.

Reading SAS Files

R

&gt;library("sas7bdat")
read.sas7bdat("mydata.sas7bdat")

Reading SPSS Files

&gt;library("foreign")
mydata <- read.spss("mydata.sav",
use.value.labels = TRUE,
to.data.frame    = TRUE)
mydata

4 Responses to Data Import

  1. I’m starting to relearn SAS now, and I find this helpful. Thanks for this Sir Bob!

  2. Kevin says:

    Hi, what is the R command to read a jmp table, please ?

    Thank you in advance,

    Kevin

    • muenchen.bob@gmail.com says:

      Hi Kevin,

      I don’t know of anything that reads JMP tables directly, but if you save it as a SAS data set, or even a comma separated value file, then you can use the functions above to read it.

      Cheers,
      Bob

Leave a Reply