JPS DSRIP Report V2.0
options(java.parameters = "-Xmx14336m") ## memory set to 14 GB
library("XLConnect")
library("sqldf")
library("tcltk")
startDate <- "2015-10-01"
endDate <- "2016-09-30"
df <- readWorksheetFromFile("JPS_Raw_Data.xlsx", sheet = 1, startRow = 2)
#sampleVector <- sample(1:nrow(df), 30000)
#df2 <- df[sampleVector,]
#write.csv(df2, file="Sample of JPS_Raw_Data (30000).csv", na="")
#df3 <- read.csv("Sample of JPS_Raw_Data (30000).csv")
### Formatting ###################################
df3[is.na(df3)] <- ""
df3$Participant.Enterprise.Identifier <- gsub("-", "", df3$Participant.Enterprise.Identifier)
colnames(df3)[2] <- "peid"
colnames(df3)[5] <- "CaseNumber"
colnames(df3)[7] <- "Gender"
colnames(df3)[8] <- "Race"
colnames(df3)[9] <- "Ethnicity"
colnames(df3)[10] <- "ProgramName"
colnames(df3)[11 ...