Data Import

Below is a comparison of the commands used to read data into in R, SAS, SPSS and Stata. The variables gender and workshop are categorical factors and q1 to q4, pretest and posttest are considered continuous and normally distributed.

The practice data set is shown here. The programs and the data they use are also available for download here. Examples that are missing for Stata reflect differences between the two books. We will get around to those when we write the second edition of R for Stata Users.

Reading a Comma-Delimited Text File

R

# With id variable not named.
mydata <- read.csv("mydata.csv")

# With id named in the header.
mydata <- read.csv("mydataID.csv", row.names = "id")

SAS

PROC IMPORT OUT=myLib.mydata
   DATAFILE="mydataID.csv"
       DBMS=CSV REPLACE;
   GETNAMES=YES;
    DATAROW=2;
 RUN;
 PROC PRINT; RUN;

SPSS

GET DATA  /TYPE=TXT
  /FILE='mydataID.csv'
  /DELCASE=LINE
  /DELIMITERS=","
  /ARRANGEMENT=DELIMITED
  /FIRSTCASE=2
  /IMPORTCASE=ALL
  /VARIABLES=id F1.0
    workshop F1.0
    gender A1.0
    q1 F1.0  q2 F1.0
    q3 F1.0  q4 F1.0 .
 LIST.
 SAVE OUTFILE='mydata.sav'.

 Stata


clear
 insheet using
   "mydata.csv",
   comma names
 list

 

Reading a Tab-delimited Text File

R

# With id variable not named.
 mydata <-read.delim("mydata.tab")
 mydata

 # With ID named in the header.
 mydata <-
   read.delim("mydataID.tab",
   row.names = "id")
 mydata
 

SAS

PROC IMPORT
   OUT=myLib.mydata
   DATAFILE="mydataID.tab"
       DBMS=TAB REPLACE;
   GETNAMES=YES;
    DATAROW=2;
 RUN;
 PROC PRINT; RUN;
 

SPSS

GET DATA
   /TYPE=TXT
   /FILE="mydataID.tab"
   /DELCASE=LINE
   /DELIMITERS="\t"
   /ARRANGEMENT=DELIMITED
   /FIRSTCASE=2
   /IMPORTCASE=ALL
   /VARIABLES = id F1.0  workshop F1.0 gender A1.0
  q1 F1.0  q2 F1.0  q3 F1.0  q4 F1.0 .
 LIST.
 EXECUTE.
 DATASET NAME DataSet1 WINDOW=FRONT.
 

 Stata

 

clear
insheet using "mydataID.tab",
   tab names
list

Reading a comma-delimited text file from a web site

R


myURL  <- "http://sites.google.com/site/r4statistics/mydata.csv"
mydata <- read.csv(myURL)
mydata

SAS


FILENAME myURL URL
"http://sites.google.com/site/r4statistics/mydataID.csv";
PROC IMPORT DATAFILE= myURL
DBMS=CSV REPLACE
OUT= myLib.mydata;
GETNAMES=YES;
DATAROW=2;
RUN;
PROC PRINT; RUN;

SPSS

SPSS cannot read text files from a URL directly. It can read SPSS, SAS, Excel, and Stata files from a URL through the SPSSINC GETURI DATA extension.

 

Reading Data Within a Program

R


# This works interactively
# See book for alternative
# approach for sourced files.

mydata <- read.csv( stdin() )
workshop,gender,q1,q2,q3,q4
1,1,f,1,1,5,1
2,2,f,2,1,4,1
3,1,f,2,2,4,3
4,2,NA,3,1,NA,3
5,1,m,4,5,2,4
6,2,m,5,4,5,5
7,1,m,5,3,4,4
8,2,m,4,5,5,5

# Blank line above ends input.

SAS


LIBNAME myLib 'C:\myRfolder';
DATA myLib.mydata;
INFILE DATALINES DELIMITER = ','
MISSOVER DSD firstobs=2 ;
INPUT id workshop gender $ q1 q2 q3 q4;
DATALINES;
id,workshop,gender,q1,q2,q3,q4
1,1,f,1,1,5,1
2,2,f,2,1,4,1
3,1,f,2,2,4,3
4,2, ,3,1, ,3
5,1,m,4,5,2,4
6,2,m,5,4,5,5
7,1,m,5,3,4,4
8,2,m,4,5,5,5
PROC PRINT; RUN;

SPSS


DATA LIST / id 2 workshop 4
gender 6 (A)
q1 8  q2 10  q3 12  q4 14.
BEGIN DATA.
1,1,f,1,1,5,1
2,2,f,2,1,4,1
3,1,f,2,2,4,3
4,2, ,3,1, ,3
5,1,m,4,5,2,4
6,2,m,5,4,5,5
7,1,m,5,3,4,4
8,2,m,4,5,5,5
END DATA.
LIST.
SAVE OUTFILE='C:\myRfolder\mydata.sav'.

Stata


clear all
input id workshop str1 gender q1-q4, automatic
1 1 f 1 1 5 1
2 2 f 2 1 4 1
3 1 f 2 2 4 3
4 2 . 3 1 . 3
5 1 m 4 5 2 4
6 2 m 5 4 5 5
7 1 m 5 3 4 4
8 2 m 4 5 5 5
end
list

Reading Multiple Observations Per Line

R


# This works interactively
# See book for alternative
# approach for sourced files.

mylist <- scan( stdin(),
what = list(
id = 0,
workshop = 0,
gender   = "",
q1 = 0, q2 = 0,
q3 = 0, q4 = 0))
1 1 f 1 1 5 1    2 2 f 2 1 4 1
3 1 f 2 2 4 3    4 2 NA 3 1 NA 3
5 1 m 4 5 2 4    6 2 m 5 4 5 5
7 1 m 5 3 4 4    8 2 m 4 5 5 5

# Blank line above ends input.
mylist

mydata <- data.frame(mylist)
mydata

SAS


DATA mydata;
INPUT id workshop gender $ q1-q4 @@;
DATALINES;
1 1 f 1 1 5 1    2 2 f 2 1 4 1
3 1 f 2 2 4 3    4 2 . 3 1 . 3
5 1 m 4 5 2 4    6 2 m 5 4 5 5
7 1 m 5 3 4 4    8 2 m 4 5 5 5
;
PROC PRINT; RUN;

SPSS


DATA LIST FREE/ id (f1.0)  workshop (f1.0) gender (A)
q1 (f1.0)  q2 (f1.0)  q3 (f1.0) q4 (f1.0).

BEGIN DATA.
1,1,f,1,1,5,1,    2,2,f,2,1,4,1
3,1,f,2,2,4,3,    4,2, ,3,1, ,3
5,1,m,4,5,2,4,    6,2,m,5,4,5,5
7,1,m,5,3,4,4,    8,2,m,4,5,5,5
END DATA.

LIST.

 

Reading Fixed-Width Text Files, One Record per Case

R


myfile <- "mydataFWF.txt"
myVarNames  <-
c("id", "gender", "q1",
"q2", "q3", "q4")
myVarWidths <-
c(2, -1, 1, 1, 1, 1, 1)

mydata <- read.fwf(
file        = myfile,
width       = myVarWidths,
col.names   = myVarNames,
row.names   = "id",
na.strings  = "",
fill        = TRUE,
strip.white = TRUE)
mydata

SAS

DATA myLib.mydata;
INFILE 'mydataFWF.txt'
MISSOVER;
INPUT id 1-2 workshop 3
gender $ 4
q1 5  q2 6  q3 7  q4 8;
RUN;


SPSS

DATA LIST FILE='mydataFWF.txt'
RECORDS=1
/1 id 1-2 workshop 3
gender 4 (A)  q1 5  q2 6
q3 7  q4 8.
LIST.

Stata

infix id 1-2 workshop 3
gender 4 q1 5 q2 6 q3 7
q4 8 ///
using mydataFWF.txt, clear
list

Reading Excel Files

R

# Do this once to install:
install.packages("xlsReadWrite")
library("xlsReadWrite")
xls.getshlib()

# Now read the file:
library("xlsReadWrite")
mydata <- read.xls("mydata.xls")
mydata

SAS

PROC IMPORT
OUT = mydata
DATAFILE = "mydata.xls"
DBMS = EXCELCS REPLACE;
RANGE    = "Sheet1$";
SCANTEXT = YES;
USEDATE  = YES;
SCANTIME = YES;
RUN;

SPSS

GET DATA
/TYPE=XLS
/FILE='mydata.xls'
/SHEET=name 'Sheet1'
/CELLRANGE=full
/READNAMES=on
/ASSUMEDSTRWIDTH=32767.
EXECUTE.

Stata

This feature has been added to the latest version of Stata, but I haven’t gotten around to learning it yet.

Reading SAS Files

R

>library("sas7bdat")
read.sas7bdat("mydata.sas7bdat")

Reading SPSS Files

>library("foreign")
mydata <- read.spss("mydata.sav",
use.value.labels = TRUE,
to.data.frame    = TRUE)
mydata

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s