This is an R script which will take two sets of HMIS 5.1 CSVs and produce a combined set.
A few notes:
- A new ExportID will need to be provided.
- Each files are deduplicated based upon the Primary Key (ProjectEntryID, PersonalID, etc.)
- The Project.csv contains a PITCount which is different based upon the date ranges the two data sets were pulled. However, the script takes the maximum of the two PITCounts.
- It requires HMIS_Functions and dplyr.
library(dplyr)
# hmisFunctions <- "/Users/user//Dropbox/HMIS/HMIS_R_Functions/HMIS_R_Functions.R"
# dataPathOne <- "/Users/user//Dropbox/HMIS/R HMIS CSV Set Merger/HMIS Data 10-01-2016 to 10-31-2016"
# dataPathTwo <- "/Users/user//Dropbox/HMIS/R HMIS CSV Set Merger/HMIS Data 11-01-2016 to 11-30-2016"
# pathForCombinedData <- "/Users/user//Dropbox/HMIS/R HMIS CSV Set Merger/"
# nameOfMergedDirectory <- "Merged"
hmisFunctions <- "/Users/user/Dropbox/HMIS/HMIS_R_Functions/HMIS_R_Functions.R"
dataPathOne <- "/Users/user/Dropbox/HMIS/R HMIS CSV Set Merger/HMIS Data 10-01-2016 to 10-31-2016"
dataPathTwo <- "/Users/user/Dropbox/HMIS/R HMIS CSV Set Merger/HMIS Data 11-01-2016 to 11-30-2016"
pathForCombinedData <- "/Users/user/Dropbox/HMIS/R HMIS CSV Set Merger/Merged/"
nameOfMergedDirectory <- "Merged"
newExportID <- "12345"
source(hmisFunctions)
dir.create(file.path(pathForCombinedData, nameOfMergedDirectory), showWarnings = FALSE)
setwd(file.path(pathForCombinedData, nameOfMergedDirectory))
##################################
# Merge Functions #
##################################
mergeHmisCsvsWithUniqueIDAndExport <- function(df1, df2, uniqueIDHeader, exportID){
# Merge the data
mergedDf <- rbind(df1, df2)
mergedDf[,uniqueIDHeader] <- as.factor(mergedDf[,uniqueIDHeader])
# Drop columns which would resist removing duplicates
drops <- c(uniqueIDHeader, "ExportID")
mergedDf <- mergedDf[ , !(names(mergedDf) %in% drops)]
# Get only unique records
mergedDf <- unique(mergedDf)
# Creat a PrimaryKey vector.
xName <- rownames(mergedDf)
# Add Primary Key back.
mergedDf <- cbind(xName=xName, mergedDf)
# Create an ExportID vector.
exportIDVector <- rep(exportID,length(mergedDf$xName))
# Add ExportID vector back.
mergedDf <- cbind(mergedDf, exportIDVector)
# Rename newly created PrimaryKey and ExportID appropriately.
colnames(mergedDf)[ncol(mergedDf)] <- "ExportID"
colnames(mergedDf)[1] <- uniqueIDHeader
mergedDf
}
mergeHmisCsvsWithExportAndPrimaryKey <- function(df1, df2, primaryKey, exportID){
# Merge the data
mergedDf <- rbind(df1, df2)
# Drop columns which would resist removing duplicates
drops <- c("ExportID")
mergedDf <- mergedDf[ , !(names(mergedDf) %in% drops)]
# Get only unique records
#mergedDf <- unique(mergedDf, incomparables = FALSE)
mergedDf <- mergedDf[!duplicated(mergedDf[,c(primaryKey)]),]
# Create an ExportID vector.
exportIDVector <- rep(exportID,length(mergedDf[1]))
# Add ExportID vector back.
mergedDf <- cbind(mergedDf, exportIDVector)
# Rename newly created PrimaryKey and ExportID appropriately.
colnames(mergedDf)[ncol(mergedDf)] <- "ExportID"
mergedDf
}
mergeHmisCsvsWithExportId <- function(df1, df2, exportID){
# Merge the data
mergedDf <- rbind(df1, df2)
# Drop columns which would resist removing duplicates
drops <- c("ExportID")
mergedDf <- unique(mergedDf[ , !(names(mergedDf) %in% drops)])
# Create an ExportID vector.
exportIDVector <- rep(exportID,length(mergedDf[1]))
# Add ExportID vector back.
mergedDf <- cbind(mergedDf, exportIDVector)
# Rename newly created PrimaryKey and ExportID appropriately.
colnames(mergedDf)[ncol(mergedDf)] <- "ExportID"
mergedDf
}
#####################
# Merge Affiliation #
#####################
affiliationOne <- loadAffiliation(dataPathOne)
affiliationTwo <- loadAffiliation(dataPathTwo)
affiliationCombined <- rbind(affiliationOne, affiliationTwo)
affiliationCombined <- unique(affiliationCombined)
remove(list=c("affiliationOne", "affiliationTwo"))
#####################
# Merge Client #
#####################
clientOne <- loadClient(dataPathOne)
clientTwo <- loadClient(dataPathTwo)
clientCombined <- rbind(clientOne, clientTwo)
clientCombined <- unique(clientCombined)
remove(list=c("clientOne", "clientTwo"))
######################
# Merge Disabilities #
######################
disabilitiesOne <- loadDisabilities(dataPathOne)
disabilitiesTwo <- loadDisabilities(dataPathTwo)
disabilitiesCombined <- mergeHmisCsvsWithUniqueIDAndExport(disabilitiesOne, disabilitiesTwo, "DisabilitiesID", newExportID)
remove(list=c("disabilitiesOne", "disabilitiesTwo"))
##################################
# Merge Employment and Education #
##################################
employmentEducationOne <- loadEmployementEducation(dataPathOne)
employmentEducationTwo <- loadEmployementEducation(dataPathTwo)
employmentEducationCombined <- mergeHmisCsvsWithUniqueIDAndExport(employmentEducationOne, employmentEducationTwo, "EmploymentEducationID", newExportID)
remove(list=c("employmentEducationOne", "employmentEducationTwo"))
#####################
# Merge Enrollment #
#####################
enrollmentOne <- loadEnrollment(dataPathOne)
enrollmentTwo <- loadEnrollment(dataPathTwo)
enrollmentCombined <- mergeHmisCsvsWithExportAndPrimaryKey(enrollmentOne, enrollmentTwo, "ProjectEntryID", newExportID)
remove(list=c("enrollmentOne", "enrollmentTwo"))
#######################
# Merge EnrollmentCoC #
#######################
enrollmentCocOne <- loadEnrollmentCoc(dataPathOne)
enrollmentCocTwo <- loadEnrollmentCoc(dataPathTwo)
enrollmentCocCombined <- mergeHmisCsvsWithUniqueIDAndExport(enrollmentCocOne,
enrollmentCocTwo,
"EnrollmentCoCID",
newExportID)
remove(list=c("enrollmentCocOne", "enrollmentCocTwo"))
#####################
# Merge Exit #
#####################
exitOne <- loadExit(dataPathOne)
exitTwo <- loadExit(dataPathTwo)
exitCombined <- mergeHmisCsvsWithExportAndPrimaryKey(exitOne,
exitTwo,
"ExitID",
newExportID)
remove(list=c("exitOne", "exitTwo"))
#####################
# Merge Expot #
#####################
exportOne <- loadExport(dataPathOne)
exportTwo <- loadExport(dataPathTwo)
exportCombined <- exportTwo
remove(list=c("exportOne", "exportTwo"))
#####################
# Merge Funder #
#####################
funderOne <- loadFunder(dataPathOne)
funderTwo <- loadFunder(dataPathTwo)
funderCombined <- mergeHmisCsvsWithExportId(funderOne, funderTwo, newExportID)
remove(list=c("funderOne", "funderTwo"))
#####################
# Merge Health & DV #
#####################
healthAndDVOne <- loadHealthAndDv(dataPathOne)
healthAndDVTwo <- loadHealthAndDv(dataPathTwo)
healthAndDVCombined <- mergeHmisCsvsWithUniqueIDAndExport(healthAndDVOne, healthAndDVTwo, "HealthAndDVID", newExportID)
remove(list=c("healthAndDVOne", "healthAndDVTwo"))
#############################
# Merge Income and Benefits #
#############################
incomeBenefitsOne <- loadIncomeBenefits(dataPathOne)
incomeBenefitsTwo <- loadIncomeBenefits(dataPathTwo)
incomeBenefitsCombined <- mergeHmisCsvsWithUniqueIDAndExport(incomeBenefitsOne,
incomeBenefitsTwo,
"IncomeBenefitsID",
newExportID)
remove(list=c("incomeBenefitsOne", "incomeBenefitsTwo"))
#####################
# Merge Inventory #
#####################
inventoryOne <- loadInventory(dataPathOne)
inventoryTwo <- loadInventory(dataPathTwo)
inventoryCombined <- mergeHmisCsvsWithUniqueIDAndExport(inventoryOne, inventoryTwo,
"InventoryID",
newExportID)
remove(list=c("inventoryOne", "inventoryTwo"))
######################
# Merge Organization #
######################
organizationOne <- loadOrganization(dataPathOne)
organizationTwo <- loadOrganization(dataPathTwo)
organizationCombined <- mergeHmisCsvsWithExportId(organizationOne, organizationTwo, newExportID)
remove(list=c("organizationOne", "organizationTwo"))
#####################
# Merge Project #
#####################
projectOne <- loadProject(dataPathOne)
projectTwo <- loadProject(dataPathTwo)
projectsCombined <- rbind(projectOne, projectTwo)
# Get only the highest PIT Count
projectsCombined <- projectsCombined %>%
group_by(ProjectID) %>%
filter(PITCount==max(PITCount))
# Remove ExportID column for flattening
drops <- c("ExportID")
projectsCombined <- projectsCombined[ , !(names(projectsCombined) %in% drops)]
projectsCombined <- unique(projectsCombined)
# Create an ExportID vector.
exportIDVector <- rep(newExportID,length(projectsCombined$ProjectID))
# Add ExportID vector back.
projectsCombined <- as.data.frame(projectsCombined)
projectsCombined <- cbind(projectsCombined, exportIDVector)
colnames(projectsCombined)[ncol(projectsCombined)] <- "ExportID"
remove(list=c("projectOne", "projectTwo"))
#####################
# Merge Project CoC #
#####################
projectsCoCOne <- loadProjectCoc(dataPathOne)
projectsCoCTwo <- loadProjectCoc(dataPathTwo)
projectCoCCombined <- rbind(projectsCoCOne, projectsCoCTwo)
# Get only the highest PIT Count
projectCoCCombined <- projectCoCCombined %>%
group_by(ProjectID) %>%
filter(PITCount==max(PITCount))
# Remove ExportID column for flattening
drops <- c("ExportID")
projectCoCCombined <- projectCoCCombined[ , !(names(projectCoCCombined) %in% drops)]
projectCoCCombined <- unique(projectCoCCombined)
# Create an ExportID vector.
exportIDVector <- rep(newExportID,length(projectCoCCombined$ProjectID))
# Add ExportID vector back.
projectCoCCombined <- as.data.frame(projectCoCCombined)
projectCoCCombined <- cbind(projectCoCCombined, exportIDVector)
colnames(projectCoCCombined)[ncol(projectCoCCombined)] <- "ExportID"
remove(list=c("projectsCoCOne", "projectsCoCTwo"))
#####################
# Merge Services #
#####################
servicesOne <- loadServices(dataPathOne)
servicesTwo <- loadServices(dataPathTwo)
servicesCombined <- mergeHmisCsvsWithUniqueIDAndExport(servicesOne, servicesTwo, "ServicesID", newExportID)
remove(list=c("servicesOne", "servicesTwo"))
#####################
# Merge Site #
#####################
siteOne <- loadSite(dataPathOne)
siteTwo <- loadSite(dataPathTwo)
siteCombined <- mergeHmisCsvsWithExportId(siteOne, siteTwo, newExportID)
remove(list=c("siteOne", "siteTwo"))
############################
# Write combined HMIS CSVs #
############################
write.csv(affiliationCombined, file = paste(pathForCombinedData, "Affiliation.csv", sep=""), na = "", row.names = FALSE)
write.csv(clientCombined, file = paste(pathForCombinedData, "Client.csv", sep=""), na = "", row.names = FALSE)
write.csv(disabilitiesCombined, file = paste(pathForCombinedData, "Disabilities.csv", sep=""), na = "", row.names = FALSE)
write.csv(employmentEducationCombined, file = paste(pathForCombinedData, "EmploymentEducation.csv", sep=""), na = "", row.names = FALSE)
write.csv(enrollmentCombined, file = paste(pathForCombinedData, "Enrollment.csv", sep=""), na = "", row.names = FALSE)
write.csv(enrollmentCocCombined, file = paste(pathForCombinedData, "EnrollmentCoC.csv", sep=""), na = "", row.names = FALSE)
write.csv(exitCombined, file = paste(pathForCombinedData, "Exit.csv", sep=""), na = "", row.names = FALSE)
write.csv(exportCombined, file = paste(pathForCombinedData, "Export.csv", sep=""), na = "", row.names = FALSE)
write.csv(funderCombined, file = paste(pathForCombinedData, "Funder.csv", sep=""), na = "", row.names = FALSE)
write.csv(healthAndDVCombined, file = paste(pathForCombinedData, "HealthAndDV.csv", sep=""), na = "", row.names = FALSE)
write.csv(incomeBenefitsCombined, file = paste(pathForCombinedData, "IncomeBenefits.csv", sep=""), na = "", row.names = FALSE)
write.csv(inventoryCombined, file = paste(pathForCombinedData, "Inventory.csv", sep=""), na = "", row.names = FALSE)
write.csv(organizationCombined, file = paste(pathForCombinedData, "Organization.csv", sep=""), na = "", row.names = FALSE)
write.csv(projectsCombined, file = paste(pathForCombinedData, "Project.csv", sep=""), na = "", row.names = FALSE)
write.csv(projectCoCCombined, file = paste(pathForCombinedData, "ProjectCoC.csv", sep=""), na = "", row.names = FALSE)
write.csv(servicesCombined, file = paste(pathForCombinedData, "Services.csv", sep=""), na = "", row.names = FALSE)
write.csv(siteCombined, file = paste(pathForCombinedData, "Site.csv", sep=""), na = "", row.names = FALSE)