# IRS 1023 DATA
# Correct raw GitHub URL
# No longer functioning? URL <-"https://raw.githubusercontent.com/DS4PS/pe4ps-textbook/master/data/org-mission-statements.rds"
dat <- readRDS("/Users/Tyler/Desktop/thatsallfolks/TIY/website_v1/WebsiteAssets/Data/IRS-1023-EZ-MISSIONS.rds")
# Load the RDS directly (don't need now )
#dat <- readRDS(url(URL))
# Quick check
head(dat[c("orgname","codedef01","mission")])
Part 1: Patterns
# Example Pattern
grep(pattern="some.reg.ex", x="mission statements", value=TRUE)
Missions with Numbers
grep(pattern="[0-9]", x=dat$mission, value=TRUE) %>% head() %>% pander()
grepl("[0-9]", dat$mission) %>% sum()
1. Filter by: Starts with “to”
filter_to <- grep("^to", x=dat$mission, value=TRUE)
head(filter_to, 6) %>% pander()
sum(grepl("^to", dat$mission))
2. Filter by: blank mission statements
filter_blank <- grep("^[[:space:]]*$", x=dat$mission)
head(filter_blank, 6)
sum(grepl("^[[:space:]]*$", dat$mission))
3. Filter by: Mission statements with trailing whitespace
filter_blank_end <- grepl("[[:space:]]+$", x=dat$mission)
sum(filter_blank_end)
trim_final_spaces <- function(x=dat$mission) sub("\\s+$", "", x)
trim_final_spaces() %>% head(6) %>% pander()
4. Filter by: Missions with a dollar sign
filter_dollar <- grepl("\\$", x=dat$mission)
sum(filter_dollar)