Wednesday, Nov 25
Tutorials | Overview | User Guide | FAQ | Contact/Help | News | Data Quality | File Structure | CDS R/D | Sponsorship | More...

 

Merging PSID Data: SAS Example

Method 1

    *---------------------------------------------------------------------*
    |                                                                     |
    | This example program demonstrates a relatively simple method for    |
    | merging PSID data.  It uses data from 3 different years, subset-    |
    | ting criteria, and the compress and tagsort options.                |
    |                                                                     |
    | When working with PSID data, the amount of available system disk    |
    | space and memory is often an important consideration.  The follow-  |
    | ing options can be used to enhance system performance and control   |
    | the amount of disk space and memory used:                           |
    |                                                                     |
    | 1)  LENGTH statements specify the number of bytes used for storing  |
    |     variables and can significantly reduce the size of a data set.  |
    |     The precision of a numeric variable is dependent on its length. |
    |     Length specifications for numeric variables are host-specific.  |
    |     Do not shorten length specifications for numeric variables con- |
    |     taining fractions.                                              |
    |                                                                     |
    | 2)  The COMPRESS= data set or system option can decrease the size   |
    |     of a data set and reduce the number of input/output operations. |
    |                                                                     |
    | 3)  The SORTSIZE= option specifies the maximum amount of memory     |
    |     available to PROC SORT.  Sortsize parameters are host-specific. |
    |                                                                     |
    | 4)  The TAGSORT= option can be used with PROC SORT to reduce the    |
    |     amount of temporary disk space used.  When the total length of  |
    |     BY variables is small compared with the record length, temp-    |
    |     orary disk space is reduced significantly, however, processing  |
    |     time can be much higher.                                        |
    |                                                                     |
    | 5)  The BUFNO= data set or system option specifies the number of    |
    |     page buffers to use when reading from or writing to a SAS data  |
    |     set.  Increasing the number of available buffers uses more      |
    |     memory while reducing the number of input/output operations.    |
    |                                                                     |
    | 6)  The BUFSIZE= data set or system option specifies the permanent  |
    |     page size for a SAS data set.  Increasing the page size uses    |
    |     more memory while reducing the number of input/output operat-   |
    |     ions.  Using a buffer size larger than necessary is inefficent. |
    |                                                                     |
    *---------------------------------------------------------------------;
    
    * Read in cross-year individual file and select variables
      from 1990-1992 needed for analysis; 

    LIBNAME PSID "[FOLDER NAME]";
    DATA IND90_92(COMPRESS=YES RENAME=(ER30642=ID90 ER30689=ID91
    ER30733=ID92));
    INFILE "[PATH]\IND2007ER.txt" LRECL = 3118 ;
    INPUT
       ER30001     2 - 5
       ER30002     6 - 8
       ER30642  1528 - 1532
       ER30643  1533 - 1534
       ER30644  1535 - 1536
       ER30645  1537 - 1539
       ER30653  1555
       ER30657  1563 - 1564
       ER30659  1566 - 1571
       ER30689  1647 - 1650
       ER30690  1651 - 1652
       ER30691  1653 - 1654
       ER30692  1655 - 1657
       ER30699  1672
       ER30703  1680 - 1681
       ER30705  1683 - 1688
       ER30707  1690 - 1695
       ER30733  1764 - 1767
       ER30734  1768 - 1769
       ER30735  1770 - 1771
       ER30736  1772 - 1774
       ER30744  1790
       ER30748  1798 - 1799
       ER30750  1801 - 1806
       ER30752  1808 - 1813
       ER30805  1907 - 1913 .3
       ER32000  2060
       ER32022  2113 - 2114
       ER32049  2189
       ;
    LABEL
       ER30001="1968 INTERVIEW NUMBER 68"
       ER30002="PERSON NUMBER         68"
       ER30642="1990 INTERVIEW NUMBER"
       ER30643="SEQUENCE NUMBER       90"
       ER30644="RELATIONSHIP TO HEAD  90"
       ER30645="AGE OF INDIVIDUAL     90"
       ER30653="EMPLOYMENT STAT       90"
       ER30657="COMPLETED EDUCATION   90"
       ER30659="TOT TXBL INCOME       90"
       ER30689="1991 INTERVIEW NUMBER"
       ER30690="SEQUENCE NUMBER       91"
       ER30691="RELATIONSHIP TO HEAD  91"
       ER30692="AGE OF INDIVIDUAL     91"
       ER30699="EMPLOYMENT STAT       91"
       ER30703="COMPLETED EDUCATION   91"
       ER30705="TOT LABOR INCOME      91"
       ER30707="TOT ASSET INCOME      91"
       ER30733="1991 INTERVIEW NUMBER"
       ER30734="SEQUENCE NUMBER       92"
       ER30735="RELATIONSHIP TO HEAD  92"
       ER30736="AGE OF INDIVIDUAL     92"
       ER30744="EMPLOYMENT STAT       92"
       ER30748="COMPLETED EDUCATION   92"
       ER30750="TOT LABOR INCOME      92"
       ER30752="TOT ASSET INCOME      92"
       ER30805="COMBINED IND WEIGHT   92"
       ER32000="SEX OF INDIVIDUAL"
       ER32022="# BIRTHS OF THIS IND"
       ER32049="LAST KNOWN MARITAL STAT"
       ;

    IF ER30645=99 THEN ER30645=.;
    IF ER30657=99 THEN ER30657=.;
    IF ER30692=99 THEN ER30692=.;
    IF ER30703=99 THEN ER30703=.;
    IF ER30736=99 THEN ER30736=.;
    IF ER30748=99 THEN ER30748=.;
    IF ER32022=98 THEN ER32022=.;
    IF ER32049=8 THEN ER32049=.;

    * Select those who were ever heads or wives/"wives" between 1990 and
      1992;  

    IF (ER30643 EQ 01 AND ER30644 EQ 10) OR
    (ER30643 EQ 02 AND (ER30644 EQ 20 OR ER30644 EQ 22)) OR
    (ER30690 EQ 01 AND ER30691 EQ 10) OR
    (ER30690 EQ 02 AND (ER30691 EQ 20 OR ER30691 EQ 22)) OR
    (ER30734 EQ 01 AND ER30735 EQ 10) OR
    (ER30734 EQ 02 AND (ER30735 EQ 20 OR ER30735 EQ 22));
    RUN ;


    * Read in 1990 family file and select variables needed for analysis;

    DATA FAM90(COMPRESS=YES RENAME=(V17702=ID90));
    INFILE "[PATH]\FAM1990.txt" LRECL=2312;
    INPUT
       V17702     4 - 8
       V17836   283 - 288
       V18262  1167 - 1169
       V18564  1633 - 1635
       V18814  2018
       V18878  2160 - 2165
       V18887  2206 - 2212 .2
       V18888  2213 - 2219 .2
       ;
    LABEL
       V17702="1990 INTERVIEW NUMBER"
       V17836="WIFE 89 LABOR/WAGE"
       V18262="C9-10 OCC-LAST JOB (H-U)"
       V18564="E9-10 OCC-LAST JOB (W-U)"
       V18814="M32 RACE OF HEAD (1 MEN)"
       V18878="TOTAL HEAD LABOR Y 89"
       V18887="HEAD 89 AVG HRLY EARNING"
       V18888="WIFE 89 AVG HRLY EARNING";

    IF V18262=999 THEN V18262=.;
    IF V18564=999 THEN V18564=.;
    IF V18814=9 THEN V18814=.;
    RUN ;

    * Sort fam90 and ind90_92 by id90; 

    PROC SORT DATA=FAM90 TAGSORT;
    BY ID90;
    RUN ;

    PROC SORT DATA=IND90_92 TAGSORT;
    BY ID90;
    RUN ;

    * Merge fam90 and ind90_92 by id90; 

    DATA PSID.FAM_IND(COMPRESS=YES);
    MERGE FAM90 IND90_92(IN=IND90_92);
    BY ID90;
    IF IND90_92;
    RUN ;


    * Read in 1991 family file and select variables needed for analysis;

    DATA FAM91(COMPRESS=YES RENAME=(V19002=ID91));
    INFILE "[PATH]\FAM1991.txt" LRECL = 2314 ;
    INPUT
       V19002     2 - 5
       V19136   281 - 286
       V19562  1165 - 1167
       V19864  1631 - 1633
       V20114  2016
       V20178  2158 - 2163
       V20187  2204 - 2210 .2
       V20188  2211 - 2217 .2
	;
    LABEL
       V19002="1991 INTERVIEW NUMBER"
       V19136="WIFE 90 LABOR/WAGE"
       V19562="C9-10 OCC-LAST JOB (H-U)"
       V19864="E9-10 OCC-LAST JOB (W-U)"
       V20114="L32 RACE OF HEAD (1 MEN)"
       V20178="TOTAL HEAD LABOR Y 90"
       V20187="HEAD 90 AVG HRLY EARNING"
       V20188="WIFE 90 AVG HRLY EARNING";

       IF V19562=999 THEN V19562=.;
       IF V19864=999 THEN V19864=.;
       IF V20114=9 THEN V20114=.;
    RUN ;

    * Sort fam91 and psid.fam_ind by id91; 

    PROC SORT DATA=FAM91 TAGSORT;
    BY ID91;
    RUN ;

    PROC SORT DATA=PSID.FAM_IND TAGSORT;
    BY ID91;
    RUN ;

    * Merge fam91 and psid.fam_ind by id91; 

    DATA PSID.FAM_IND(COMPRESS=YES);
    MERGE FAM91 PSID.FAM_IND(IN=FAM_IND);
    BY ID91;
    IF FAM_IND;
    RUN ;

    * Read in 1992 family file and select variables needed for analysis;

    DATA FAM92(COMPRESS=YES RENAME=(V20302=ID92));
    INFILE "[PATH]\FAM1992.txt" LRECL = 2294 ;
    INPUT
       V20302     4 - 7
       V20436   283 - 288
       V20862  1174 - 1176
       V21164  1640 - 1642
       V21420  2031
       V21484  2137 - 2142
       V21493  2183 - 2189 .2
       V21494  2190 - 2196 .2
       ;
    LABEL
       V20302="1992 INTERVIEW NUMBER"
       V20436="WIFE 91 LABOR/WAGE"
       V20862="C9-10 OCC-LAST JOB (H-U)"
       V21164="E9-10 OCC-LAST JOB (W-U)"
       V21420="M32 RACE OF HEAD (1 MEN)"
       V21484="TOTAL HEAD LABOR Y 91"
       V21493="HEAD 91 AVG HRLY EARNING"
       V21494="WIFE 91 AVG HRLY EARNING";

    IF V20862=999 THEN V20862=.;
    IF V21164=999 THEN V21164=.;
    IF V21420=9 THEN V21420=.;
    RUN ;


    * Sort fam92 and psid.fam_ind by id92; 

    PROC SORT DATA=FAM92 TAGSORT;
    BY ID92;
    RUN ;

    PROC SORT DATA=PSID.FAM_IND TAGSORT;
    BY ID92;
    RUN ;


    * Merge fam92 and psid.fam_ind by id92;

    DATA PSID.FAM_IND(COMPRESS=YES);
    MERGE FAM92 PSID.FAM_IND(IN=FAM_IND);
    BY ID92;
    IF FAM_IND;
    RUN;

 
 



Institute for Social Research | University of Michigan | Privacy | Conditions of Use