# ***************************************************************************************** # January 2019 # # ** LINKED NCHS-CMS MEDICAID (MAX) FEASIBILITY STUDY ASCII FILE ** # # The following R code can be used to read the fixed-width format LINKED NCHS-CMS # MEDICAID (MAX) FEASIBILITY STUDY ASCII FILE from a stored location into a R data frame. # Basic frequencies are also produced. # There is a small section of code that needs to be uncommented out if you are reading a NHANES file # # ***************************************************************************************** # # NOTE: To download and save the public-use files to your hard-drive, follow these steps: # # (1) Designate a folder on your hard-drive to download the public-use file. In this example, # the data will be saved to "C:\PUBLIC USE DATA" # # (2) The public-uses can be downloaded from this website: # ftp://ftp.cdc.gov/pub/Health_Statistics/NCHS/datalinkage/feasibility_study_data/CMS/medicaid/ # # Right click on the desired survey link and select "Save target as...". A "Save As" # screen will appear where you will need to select and input a location where to # save the data file on your hard drive. # # Also note that the "Save as type:" box should read "DAT File (*.dat)". This will ensure # that the data file is saved to your hard drive in the correct format. # # In this example, the data file is saved in the folder, "C:\PUBLIC USE DATA", and the # data file is saved as "_MORT_2015_PUBLIC.DAT". # # ***************************************************************************************** # # R NOTES: # (1) For convenience, the user can place the name of the public-use NCHS-CMS MEDICAID (MAX) FEASIBILITY # they are reading # in and creating as a R data frame in just two places: (1) the line beginning with # srvyin; and (2) the line beginning with srvyout. The resultant R data frame # will have the shorthand name assigned in the srvyout line. # # (2) Variable labels are not provided. Please see the public-use LMF codebook for # this information. # # (3) Variable value formats are not attached to the variables. The value formats, # however, are included in comment blocks in the Variable Frequencies section. # # ***************************************************************************************** #install.packages("readr") #if package is not installed already, then uncomment and run this line #install.packages("dplyr") #if package is not installed already, then uncomment and run this line library(readr) library(dplyr) # the location where the .DAT file is saved: setwd ( "\\\\cdc/csp_project/CIPSEA_OAE_LINK_MEDICAID/MEDICAID_1999_2014/Medicaid_Feasibility_File/Data" ) # remove all objects from the R environment rm(list=ls()) # place survey name here (substitute survey name where is): srvyin <- paste("MEDICAID_FEASIBILITY_NHIS_2013.dat") # full .DAT name here srvyout <- "NHIS_2013" # shorthand dataset name here # Example syntax: #srvyin <- paste("MEDICAID_FEASIBILITY_NHIS_1998.DAT") #srvyout <- "NHIS_1998" # read in the fixed-width format ASCII file dsn <- read_fwf(file=srvyin, col_types = "ciiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii", fwf_cols(publicid = c(1,14), cms_medicaid_match = c(15), on_ps_1999 = c(16), on_ps_2000 = c(17), on_ps_2001 = c(18), on_ps_2002 = c(19), on_ps_2003 = c(20), on_ps_2004 = c(21), on_ps_2005 = c(22), on_ps_2006 = c(23), on_ps_2007 = c(24), on_ps_2008 = c(25), on_ps_2009 = c(26), on_ps_2010 = c(27), on_ps_2011 = c(28), on_ps_2012 = c(29), on_ps_2013 = c(30), on_ps_2014 = c(31), on_ip_1999 = c(32), on_ip_2000 = c(33), on_ip_2001 = c(34), on_ip_2002 = c(35), on_ip_2003 = c(36), on_ip_2004 = c(37), on_ip_2005 = c(38), on_ip_2006 = c(39), on_ip_2007 = c(40), on_ip_2008 = c(41), on_ip_2009 = c(42), on_ip_2010 = c(43), on_ip_2011 = c(44), on_ip_2012 = c(45), on_ip_2013 = c(46), on_ip_2014 = c(47), on_lt_1999 = c(48), on_lt_2000 = c(49), on_lt_2001 = c(50), on_lt_2002 = c(51), on_lt_2003 = c(52), on_lt_2004 = c(53), on_lt_2005 = c(54), on_lt_2006 = c(55), on_lt_2007 = c(56), on_lt_2008 = c(57), on_lt_2009 = c(58), on_lt_2010 = c(59), on_lt_2011 = c(60), on_lt_2012 = c(61), on_lt_2013 = c(62), on_lt_2014 = c(63), on_ot_1999 = c(64), on_ot_2000 = c(65), on_ot_2001 = c(66), on_ot_2002 = c(67), on_ot_2003 = c(68), on_ot_2004 = c(69), on_ot_2005 = c(70), on_ot_2006 = c(71), on_ot_2007 = c(72), on_ot_2008 = c(73), on_ot_2009 = c(74), on_ot_2010 = c(75), on_ot_2011 = c(76), on_ot_2012 = c(77), on_ot_2013 = c(78), on_ot_2014 = c(79), on_rx_1999 = c(80), on_rx_2000 = c(81), on_rx_2001 = c(82), on_rx_2002 = c(83), on_rx_2003 = c(84), on_rx_2004 = c(85), on_rx_2005 = c(86), on_rx_2006 = c(87), on_rx_2007 = c(88), on_rx_2008 = c(89), on_rx_2009 = c(90), on_rx_2010 = c(91), on_rx_2011 = c(92), on_rx_2012 = c(93), on_rx_2013 = c(94), on_rx_2014 = c(95) ), na = "." ) #*********This section is for NHanes only ************ #dsn$seqn <- substr(dsn$publicid,1,5) #dsn$resnum <- substr(dsn$publicid,1,6) #dsn$patnum <- substr(dsn$publicid,1,6) #dsn <- select(dsn,-publicid) #****************************************************** # Structure and contents of data str(dsn) # Variable frequencies table(dsn$cms_medicaid_match) table(dsn$on_ps_1999) table(dsn$on_ps_2000) table(dsn$on_ps_2001) table(dsn$on_ps_2002) table(dsn$on_ps_2003) table(dsn$on_ps_2004) table(dsn$on_ps_2005) table(dsn$on_ps_2006) table(dsn$on_ps_2007) table(dsn$on_ps_2008) table(dsn$on_ps_2009) table(dsn$on_ps_2010) table(dsn$on_ps_2011) table(dsn$on_ps_2012) table(dsn$on_ps_2013) table(dsn$on_ip_1999) table(dsn$on_ip_2000) table(dsn$on_ip_2001) table(dsn$on_ip_2002) table(dsn$on_ip_2003) table(dsn$on_ip_2004) table(dsn$on_ip_2005) table(dsn$on_ip_2006) table(dsn$on_ip_2007) table(dsn$on_ip_2008) table(dsn$on_ip_2009) table(dsn$on_ip_2010) table(dsn$on_ip_2011) table(dsn$on_ip_2012) table(dsn$on_ip_2013) table(dsn$on_lt_1999) table(dsn$on_lt_2000) table(dsn$on_lt_2001) table(dsn$on_lt_2002) table(dsn$on_lt_2003) table(dsn$on_lt_2004) table(dsn$on_lt_2005) table(dsn$on_lt_2006) table(dsn$on_lt_2007) table(dsn$on_lt_2008) table(dsn$on_lt_2009) table(dsn$on_lt_2010) table(dsn$on_lt_2011) table(dsn$on_lt_2012) table(dsn$on_lt_2013) table(dsn$on_ot_1999) table(dsn$on_ot_2000) table(dsn$on_ot_2001) table(dsn$on_ot_2002) table(dsn$on_ot_2003) table(dsn$on_ot_2004) table(dsn$on_ot_2005) table(dsn$on_ot_2006) table(dsn$on_ot_2007) table(dsn$on_ot_2008) table(dsn$on_ot_2009) table(dsn$on_ot_2010) table(dsn$on_ot_2011) table(dsn$on_ot_2012) table(dsn$on_ot_2013) table(dsn$on_rx_1999) table(dsn$on_rx_2000) table(dsn$on_rx_2001) table(dsn$on_rx_2002) table(dsn$on_rx_2003) table(dsn$on_rx_2004) table(dsn$on_rx_2005) table(dsn$on_rx_2006) table(dsn$on_rx_2007) table(dsn$on_rx_2008) table(dsn$on_rx_2009) table(dsn$on_rx_2010) table(dsn$on_rx_2011) table(dsn$on_rx_2012) table(dsn$on_rx_2013) # Re-name the dataset, DSN, to the short survey name then remove other R objects assign(paste0(srvyout), dsn) rm(dsn, srvyin, srvyout)