Code
source(here::here("r", "libraries.r"))
library(jsonlite)
library(tidyjson)source(here::here("r", "libraries.r"))
library(jsonlite)
library(tidyjson)tddir <- r"(E:\data\taxdata-psl)"
# URL of tax-calculator variables json file
url <- "https://raw.githubusercontent.com/PSLmodels/Tax-Calculator/master/taxcalc/records_variables.json"
url_pe_flat <- "https://github.com/PSLmodels/tax-microdata-benchmarking/releases/download/policyengine-cps-flat-file/tax_microdata.csv.gz"
wrp <- r"(c:\Program Files\WinRAR\WinRAR.exe)"
pez <- path(tddir, "tax_microdata.csv.gz")# #| code-fold: true
# update this if taxdata variables change
jsdata <- fromJSON(url)
names(jsdata)
jsdata$read # 106
jsdata$calc # 103
allvars <- bind_rows(tibble(vtype="read", lst=jsdata$read),
tibble(vtype="calc", lst=jsdata$calc)) |>
mutate(vname=names(lst)) |>
unnest_wider(col=lst) |>
unnest_longer(col=form) |>
rename(formyears=form_id)|>
relocate(vname)
glimpse(allvars)
count(allvars, vtype)
count(allvars, type)
count(allvars, availability)
saveRDS(allvars, here::here("data", "tcvars.rds"))# only update when the pe flatfile changes
download.file(url=url_pe_flat, destfile = pez, mode="wb")tcvars <- readRDS(here::here("data", "tcvars.rds"))
# ht(tcvars)
utcvars <- tcvars |>
select(vname, vtype, desc) |>
distinct()
# get frozen tax-calculator file for 2023 ----
tdfn <- "tc23.csv"
tdpath <- path(tddir, tdfn)
tddf <- vroom(tdpath) # Rows: 252868 Columns: 209
# glimpse(tddf)# get the file modification date of the csv file using winrar (wrp)
# because that will not change the date the way other utilities do
# un-winrar the file
command <- sprintf('"%s" x -ibck -o+ "%s" "*.*" "%s"', wrp, pez, tddir)
system(command, wait = TRUE)
# now that we have the file unzipped, get its date
pefn <- "tax_microdata.csv"
pepath <- path(tddir, pefn)
pedata <- file_info(pepath)
# glimpse(pedata)
pedate <- pedata$modification_time
# we can delete the csv file if we want
## get the pe data directly from the gz file ----
pedf <- vroom(pez) # read directly from the gz file Rows: 155312 Columns: 62
# glimpse(pedf)This file examines the version of the Policy Engine flat file created on 2024-02-20 09:35:51.
# Nikhil fixed this
setdiff(names(pedf), names(tddf)) |> sort() # "e00800p" "e00800s" "e01500p" "e01500s" "e02300p" "e02300s" "e02400p" "e02400s"character(0)
Note: Prior versions of the PE flat file included the following variables that were not in taxdata. It appears that they now have been removed:

utcvars |>
filter(vtype=="read",
!vname %in% names(pedf)) |>
select(vname, desc) |>
arrange(vname) |>
gt()| vname | desc |
|---|---|
| MIDR | 1 if separately filing spouse itemizes; otherwise 0 |
| PT_SSTB_income | Value of one implies business income is from a specified service trade or business (SSTB); value of zero implies business income is from a qualified trade or business |
| PT_binc_w2_wages | Filing unit's share of total W-2 wages paid by the pass-through business |
| PT_ubia_property | Filing unit's share of total business property owned by the pass-through business |
| a_lineno | CPS line number for the person record of the head of the tax filing unit (not used in tax-calculation logic) |
| agi_bin | Historical AGI category used in data extrapolation |
| cmbtp | Estimate of income on (AMT) Form 6251 but not in AGI |
| data_source | 1 if unit is created primarily from IRS-SOI PUF data; 0 if created primarily from CPS data (not used in tax-calculation logic) |
| e00700 | Taxable refunds of state and local income taxes |
| e01200 | Other net gain/loss from Form 4797 |
| e02000 | Sch E total rental, royalty, partnership, S-corporation, etc, income/loss (includes e26270 and e27200) |
| e03220 | Educator expenses |
| e03230 | Tuition and fees from Form 8917 |
| e03240 | Domestic production activities from Form 8903 |
| e03290 | Health savings account deduction from Form 8889 |
| e03300 | Contributions to SEP, SIMPLE and qualified plans |
| e03400 | Penalty on early withdrawal of savings |
| e03500 | Alimony paid |
| e07240 | Retirement savings contributions credit from Form 8880 |
| e07260 | Residential energy credit from Form 5695 |
| e07300 | Foreign tax credit from Form 1116 |
| e07400 | General business credit from Form 3800 |
| e07600 | Prior year minimum tax credit from Form 8801 |
| e09700 | Recapture of Investment Credit |
| e09800 | Unreported payroll taxes from Form 4137 or 8919 |
| e09900 | Penalty tax on qualified retirement plans |
| e11200 | Excess payroll (FICA/RRTA) tax withheld |
| e18500 | Itemizable real-estate taxes paid |
| e19200 | Itemizable interest paid |
| e19800 | Itemizable charitable giving: cash/check contributions. WARNING: this variable is already capped in PUF data. |
| e20100 | Itemizable charitable giving: other than cash/check contributions. WARNING: this variable is already capped in PUF data. |
| e20400 | Itemizable miscellaneous deductions. WARNING: this variable is zero below the floor in PUF data. |
| e24515 | Sch D: Un-Recaptured Section 1250 Gain |
| e24518 | Sch D: 28% Rate Gain or Loss |
| e26270 | Sch E: Combined partnership and S-corporation net income/loss (includes k1bx14p and k1bx14s amounts and is included in e02000) |
| e27200 | Sch E: Farm rent net income or loss (included in e02000) |
| e58990 | Investment income elected amount from Form 4952 |
| e62900 | Alternative Minimum Tax foreign tax credit from Form 6251 |
| e87521 | Total tentative AmOppCredit amount for all students |
| e87530 | Adjusted qualified lifetime learning expenses for all students |
| f6251 | 1 if Form 6251 (AMT) attached to return; otherwise 0 |
| ffpos | CPS family identifier within household (not used in tax-calculation logic) |
| g20500 | Itemizable gross (before 10% AGI disregard) casualty or theft loss |
| h_seq | CPS household sequence number (not used in tax-calculation logic) |
| k1bx14p | Partner self-employment earnings/loss for taxpayer (included in e26270 total) |
| k1bx14s | Partner self-employment earnings/loss for spouse (included in e26270 total) |
| mcare_ben | Imputed Medicare benefits expressed as the actuarial value of Medicare health insurance |
| other_ben | Non-imputed benefits |
| p08000 | Other tax credits (but not including Sch R credit) |
| vet_ben | Imputed Veteran's benefits |
goodvars <- intersect(names(tddf), names(pedf))
petdstack <- bind_rows(
tddf |> select(any_of(goodvars)) |> mutate(src="td"),
pedf |> select(any_of(goodvars)) |> mutate(src="pe"))
skim(petdstack) # good, no missing values| Name | petdstack |
| Number of rows | 408180 |
| Number of columns | 57 |
| _______________________ | |
| Column type frequency: | |
| character | 1 |
| numeric | 56 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| src | 0 | 1 | 2 | 2 | 0 | 2 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| DSI | 0 | 1 | 0.02 | 0.14 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
| EIC | 0 | 1 | 0.28 | 0.70 | 0.00 | 0.00 | 0.00 | 0.00 | 3.00 | ▇▁▁▁▁ |
| FLPDYR | 0 | 1 | 2023.38 | 0.49 | 2023.00 | 2023.00 | 2023.00 | 2024.00 | 2024.00 | ▇▁▁▁▅ |
| MARS | 0 | 1 | 1.86 | 1.02 | 1.00 | 1.00 | 2.00 | 2.00 | 5.00 | ▇▇▁▂▁ |
| RECID | 0 | 1 | 20852996.64 | 40660279.83 | 1.00 | 101397.75 | 202680.50 | 6319501.25 | 108897800.00 | ▇▁▁▁▂ |
| XTOT | 0 | 1 | 2.02 | 1.28 | 0.00 | 1.00 | 2.00 | 3.00 | 13.00 | ▇▃▁▁▁ |
| age_head | 0 | 1 | 47.54 | 17.51 | 0.00 | 33.00 | 47.00 | 61.00 | 85.00 | ▁▇▇▇▃ |
| age_spouse | 0 | 1 | 21.24 | 25.90 | 0.00 | 0.00 | 0.00 | 45.00 | 96.00 | ▇▂▃▂▁ |
| blind_head | 0 | 1 | 0.02 | 0.13 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
| blind_spouse | 0 | 1 | 0.00 | 0.05 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
| e00200 | 0 | 1 | 285514.70 | 1833040.88 | 0.00 | 5598.14 | 45591.53 | 117855.65 | 121494256.00 | ▇▁▁▁▁ |
| e00200p | 0 | 1 | 164137.92 | 1202918.17 | 0.00 | 0.00 | 31479.86 | 78001.83 | 105560584.00 | ▇▁▁▁▁ |
| e00200s | 0 | 1 | 121376.79 | 1119019.36 | 0.00 | 0.00 | 0.00 | 15817.47 | 121494256.00 | ▇▁▁▁▁ |
| e00300 | 0 | 1 | 19593.95 | 380849.98 | -7974000.00 | 0.00 | 0.00 | 236.06 | 75919241.73 | ▇▁▁▁▁ |
| e00400 | 0 | 1 | 9969.80 | 112507.90 | 0.00 | 0.00 | 0.00 | 0.00 | 15409925.50 | ▇▁▁▁▁ |
| e00600 | 0 | 1 | 49718.29 | 700665.65 | 0.00 | 0.00 | 0.00 | 189.99 | 98956000.00 | ▇▁▁▁▁ |
| e00650 | 0 | 1 | 38939.43 | 641371.90 | 0.00 | 0.00 | 0.00 | 80.00 | 95590000.00 | ▇▁▁▁▁ |
| e00800 | 0 | 1 | 45.15 | 1849.89 | 0.00 | 0.00 | 0.00 | 0.00 | 277528.49 | ▇▁▁▁▁ |
| e00900 | 0 | 1 | 13491.49 | 251457.62 | -43485522.52 | 0.00 | 0.00 | 0.00 | 26459762.31 | ▁▁▁▇▁ |
| e00900p | 0 | 1 | 11730.97 | 244948.36 | -43485522.52 | 0.00 | 0.00 | 0.00 | 26459762.31 | ▁▁▁▇▁ |
| e00900s | 0 | 1 | 1760.51 | 48947.77 | -7877854.08 | 0.00 | 0.00 | 0.00 | 10220239.95 | ▁▁▇▁▁ |
| e01100 | 0 | 1 | 41.78 | 2841.44 | 0.00 | 0.00 | 0.00 | 0.00 | 934299.43 | ▇▁▁▁▁ |
| e01400 | 0 | 1 | 6554.49 | 97809.10 | 0.00 | 0.00 | 0.00 | 0.00 | 10246224.44 | ▇▁▁▁▁ |
| e01500 | 0 | 1 | 30633.05 | 328754.94 | 0.00 | 0.00 | 0.00 | 0.00 | 75570690.19 | ▇▁▁▁▁ |
| e01700 | 0 | 1 | 6264.72 | 45952.23 | 0.00 | 0.00 | 0.00 | 0.00 | 5627129.48 | ▇▁▁▁▁ |
| e02100 | 0 | 1 | 16.52 | 55521.00 | -10218677.07 | 0.00 | 0.00 | 0.00 | 10730000.00 | ▁▁▇▁▁ |
| e02100p | 0 | 1 | 9.37 | 54293.24 | -10218677.07 | 0.00 | 0.00 | 0.00 | 10730000.00 | ▁▁▇▁▁ |
| e02100s | 0 | 1 | 7.16 | 10015.31 | -3391138.88 | 0.00 | 0.00 | 0.00 | 1502646.79 | ▁▁▁▇▁ |
| e02300 | 0 | 1 | 103.79 | 755.35 | 0.00 | 0.00 | 0.00 | 0.00 | 50000.00 | ▇▁▁▁▁ |
| e02400 | 0 | 1 | 5325.16 | 12170.05 | 0.00 | 0.00 | 0.00 | 0.00 | 146110.40 | ▇▁▁▁▁ |
| e03150 | 0 | 1 | 125.68 | 1301.94 | 0.00 | 0.00 | 0.00 | 0.00 | 19139.90 | ▇▁▁▁▁ |
| e03210 | 0 | 1 | 51.87 | 368.97 | 0.00 | 0.00 | 0.00 | 0.00 | 3987.48 | ▇▁▁▁▁ |
| e03270 | 0 | 1 | 751.21 | 4039.89 | 0.00 | 0.00 | 0.00 | 0.00 | 88881.02 | ▇▁▁▁▁ |
| e17500 | 0 | 1 | 2834.25 | 10849.17 | 0.00 | 0.00 | 0.00 | 1077.39 | 1383698.38 | ▇▁▁▁▁ |
| e18400 | 0 | 1 | 27982.48 | 232567.40 | -5675.82 | 0.00 | 0.00 | 2056.38 | 24180068.87 | ▇▁▁▁▁ |
| e32800 | 0 | 1 | 171.10 | 973.35 | 0.00 | 0.00 | 0.00 | 0.00 | 9569.95 | ▇▁▁▁▁ |
| elderly_dependents | 0 | 1 | 0.00 | 0.01 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
| f2441 | 0 | 1 | 0.17 | 0.56 | 0.00 | 0.00 | 0.00 | 0.00 | 7.00 | ▇▁▁▁▁ |
| fips | 0 | 1 | 27.33 | 16.22 | 1.00 | 12.00 | 27.00 | 41.00 | 56.00 | ▇▅▅▆▆ |
| housing_ben | 0 | 1 | 66.90 | 753.21 | 0.00 | 0.00 | 0.00 | 0.00 | 31424.64 | ▇▁▁▁▁ |
| mcaid_ben | 0 | 1 | 837.15 | 4644.91 | 0.00 | 0.00 | 0.00 | 0.00 | 144528.75 | ▇▁▁▁▁ |
| n1820 | 0 | 1 | 0.08 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
| n21 | 0 | 1 | 1.41 | 0.59 | 0.00 | 1.00 | 1.00 | 2.00 | 6.00 | ▇▆▁▁▁ |
| n24 | 0 | 1 | 0.47 | 0.89 | 0.00 | 0.00 | 0.00 | 1.00 | 9.00 | ▇▁▁▁▁ |
| nu06 | 0 | 1 | 0.16 | 0.47 | 0.00 | 0.00 | 0.00 | 0.00 | 6.00 | ▇▁▁▁▁ |
| nu13 | 0 | 1 | 0.38 | 0.79 | 0.00 | 0.00 | 0.00 | 0.00 | 7.00 | ▇▁▁▁▁ |
| nu18 | 0 | 1 | 0.55 | 0.95 | 0.00 | 0.00 | 0.00 | 1.00 | 10.00 | ▇▁▁▁▁ |
| p22250 | 0 | 1 | -5063.82 | 1028508.44 | -412783864.54 | 0.00 | 0.00 | 0.00 | 130246694.17 | ▁▁▁▇▁ |
| p23250 | 0 | 1 | 320028.59 | 3617295.26 | -93066402.13 | 0.00 | 0.00 | 0.00 | 301474332.45 | ▁▇▁▁▁ |
| pencon_p | 0 | 1 | 2097.61 | 5359.55 | 0.00 | 0.00 | 0.00 | 1200.00 | 680000.00 | ▇▁▁▁▁ |
| pencon_s | 0 | 1 | 1277.70 | 4357.63 | 0.00 | 0.00 | 0.00 | 0.00 | 130000.00 | ▇▁▁▁▁ |
| s006 | 0 | 1 | 974.95 | 1293.27 | 0.00 | 15.67 | 522.83 | 1400.56 | 18203.98 | ▇▁▁▁▁ |
| snap_ben | 0 | 1 | 120.25 | 798.28 | 0.00 | 0.00 | 0.00 | 0.00 | 27992.40 | ▇▁▁▁▁ |
| ssi_ben | 0 | 1 | 108.60 | 1088.74 | 0.00 | 0.00 | 0.00 | 0.00 | 39612.00 | ▇▁▁▁▁ |
| tanf_ben | 0 | 1 | 17.68 | 387.56 | 0.00 | 0.00 | 0.00 | 0.00 | 25776.50 | ▇▁▁▁▁ |
| wic_ben | 0 | 1 | 6.63 | 112.96 | 0.00 | 0.00 | 0.00 | 0.00 | 5073.95 | ▇▁▁▁▁ |
# save stacked file ----
petdstack |>
write_csv(here::here(tddir, "scratch", "petdstack.csv"))
# save td file without calc variables ----
tdkeepvars <- utcvars |>
filter(vtype=="read",
vname %in% names(tddf))
tdall <- tddf |>
select(any_of(tdkeepvars$vname)) |> mutate(src="tdall")
skim(tdall)| Name | tdall |
| Number of rows | 252868 |
| Number of columns | 107 |
| _______________________ | |
| Column type frequency: | |
| character | 1 |
| numeric | 106 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| src | 0 | 1 | 5 | 5 | 0 | 1 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| DSI | 0 | 1 | 0.03 | 0.18 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
| EIC | 0 | 1 | 0.17 | 0.56 | 0.00 | 0.00 | 0.00 | 0.00 | 3.00 | ▇▁▁▁▁ |
| FLPDYR | 0 | 1 | 2023.00 | 0.00 | 2023.00 | 2023.00 | 2023.00 | 2023.00 | 2023.00 | ▁▁▇▁▁ |
| MARS | 0 | 1 | 1.80 | 0.88 | 1.00 | 1.00 | 2.00 | 2.00 | 4.00 | ▇▇▁▁▂ |
| MIDR | 0 | 1 | 0.01 | 0.08 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
| RECID | 0 | 1 | 126434.50 | 72996.85 | 1.00 | 63217.75 | 126434.50 | 189651.25 | 252868.00 | ▇▇▇▇▇ |
| XTOT | 0 | 1 | 2.11 | 1.30 | 0.00 | 1.00 | 2.00 | 3.00 | 10.00 | ▇▃▁▁▁ |
| age_head | 0 | 1 | 46.09 | 16.44 | 1.00 | 33.00 | 46.00 | 59.00 | 85.00 | ▁▇▇▇▂ |
| age_spouse | 0 | 1 | 23.29 | 26.29 | 0.00 | 0.00 | 0.00 | 48.00 | 96.00 | ▇▂▃▂▁ |
| agi_bin | 0 | 1 | 7.38 | 4.62 | 0.00 | 3.00 | 8.00 | 11.00 | 18.00 | ▇▆▆▇▂ |
| blind_head | 0 | 1 | 0.01 | 0.11 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
| blind_spouse | 0 | 1 | 0.00 | 0.05 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
| cmbtp | 0 | 1 | 27175.40 | 831505.44 | -38029107.62 | 0.00 | 0.00 | 0.00 | 132950663.52 | ▁▇▁▁▁ |
| e00200 | 0 | 1 | 241188.14 | 1153830.96 | 0.00 | 201.60 | 39078.45 | 118941.16 | 87662890.23 | ▇▁▁▁▁ |
| e00200p | 0 | 1 | 145270.13 | 770865.52 | 0.00 | 0.00 | 23105.91 | 70713.39 | 43831445.12 | ▇▁▁▁▁ |
| e00200s | 0 | 1 | 95918.01 | 626005.05 | 0.00 | 0.00 | 0.00 | 22371.64 | 43831445.12 | ▇▁▁▁▁ |
| pencon_p | 0 | 1 | 2619.34 | 5972.99 | 0.00 | 0.00 | 0.00 | 0.00 | 25587.08 | ▇▁▁▁▁ |
| pencon_s | 0 | 1 | 1720.10 | 5238.42 | 0.00 | 0.00 | 0.00 | 0.00 | 25587.08 | ▇▁▁▁▁ |
| e00300 | 0 | 1 | 23276.97 | 423509.43 | 0.00 | 0.00 | 0.00 | 302.30 | 75919241.73 | ▇▁▁▁▁ |
| e00400 | 0 | 1 | 10464.08 | 114433.60 | 0.00 | 0.00 | 0.00 | 0.00 | 15409925.50 | ▇▁▁▁▁ |
| e00600 | 0 | 1 | 43898.22 | 566942.77 | 0.00 | 0.00 | 0.00 | 436.98 | 70391623.86 | ▇▁▁▁▁ |
| e00650 | 0 | 1 | 33349.35 | 505240.65 | 0.00 | 0.00 | 0.00 | 208.99 | 70391623.86 | ▇▁▁▁▁ |
| e00700 | 0 | 1 | 2924.51 | 35725.19 | 0.00 | 0.00 | 0.00 | 0.00 | 4584005.14 | ▇▁▁▁▁ |
| e00800 | 0 | 1 | 58.01 | 2140.94 | 0.00 | 0.00 | 0.00 | 0.00 | 277528.49 | ▇▁▁▁▁ |
| e00900 | 0 | 1 | 15515.39 | 270021.29 | -43485522.52 | 0.00 | 0.00 | 0.00 | 26459762.31 | ▁▁▁▇▁ |
| e00900p | 0 | 1 | 13107.81 | 260937.16 | -43485522.52 | 0.00 | 0.00 | 0.00 | 26459762.31 | ▁▁▁▇▁ |
| e00900s | 0 | 1 | 2407.59 | 59050.15 | -7877854.08 | 0.00 | 0.00 | 0.00 | 10220239.95 | ▁▁▇▁▁ |
| e01100 | 0 | 1 | 67.45 | 3609.85 | 0.00 | 0.00 | 0.00 | 0.00 | 934299.43 | ▇▁▁▁▁ |
| e01200 | 0 | 1 | -6956.64 | 453889.80 | -67056722.27 | 0.00 | 0.00 | 0.00 | 44120613.22 | ▁▁▁▇▁ |
| e01400 | 0 | 1 | 10580.28 | 124096.28 | 0.00 | 0.00 | 0.00 | 0.00 | 10246224.44 | ▇▁▁▁▁ |
| e01500 | 0 | 1 | 47782.30 | 416591.35 | 0.00 | 0.00 | 0.00 | 0.00 | 75570690.19 | ▇▁▁▁▁ |
| e01700 | 0 | 1 | 8446.89 | 57047.66 | 0.00 | 0.00 | 0.00 | 0.00 | 5627129.48 | ▇▁▁▁▁ |
| e02000 | 0 | 1 | 134290.17 | 1349169.19 | -52824630.63 | 0.00 | 0.00 | 0.00 | 105487980.91 | ▁▇▁▁▁ |
| e02100 | 0 | 1 | -414.22 | 56357.39 | -10218677.07 | 0.00 | 0.00 | 0.00 | 3585758.79 | ▁▁▁▇▁ |
| e02100p | 0 | 1 | -407.58 | 54439.54 | -10218677.07 | 0.00 | 0.00 | 0.00 | 3585758.79 | ▁▁▁▇▁ |
| e02100s | 0 | 1 | -6.65 | 12614.12 | -3391138.88 | 0.00 | 0.00 | 0.00 | 1502646.79 | ▁▁▁▇▁ |
| e02300 | 0 | 1 | 116.95 | 589.74 | 0.00 | 0.00 | 0.00 | 0.00 | 22930.01 | ▇▁▁▁▁ |
| e02400 | 0 | 1 | 5120.01 | 12364.86 | 0.00 | 0.00 | 0.00 | 0.00 | 146110.40 | ▇▁▁▁▁ |
| e03150 | 0 | 1 | 202.87 | 1649.39 | 0.00 | 0.00 | 0.00 | 0.00 | 19139.90 | ▇▁▁▁▁ |
| e03210 | 0 | 1 | 83.73 | 465.93 | 0.00 | 0.00 | 0.00 | 0.00 | 3987.48 | ▇▁▁▁▁ |
| e03220 | 0 | 1 | 8.97 | 62.36 | 0.00 | 0.00 | 0.00 | 0.00 | 797.50 | ▇▁▁▁▁ |
| e03230 | 0 | 1 | 38.48 | 442.84 | 0.00 | 0.00 | 0.00 | 0.00 | 6379.97 | ▇▁▁▁▁ |
| e03240 | 0 | 1 | 2961.42 | 50187.76 | 0.00 | 0.00 | 0.00 | 0.00 | 7617678.69 | ▇▁▁▁▁ |
| e03270 | 0 | 1 | 1212.60 | 5077.93 | 0.00 | 0.00 | 0.00 | 0.00 | 88881.02 | ▇▁▁▁▁ |
| e03290 | 0 | 1 | 119.61 | 959.61 | 0.00 | 0.00 | 0.00 | 0.00 | 12402.00 | ▇▁▁▁▁ |
| e03300 | 0 | 1 | 2351.33 | 18756.94 | 0.00 | 0.00 | 0.00 | 0.00 | 754271.41 | ▇▁▁▁▁ |
| e03400 | 0 | 1 | 12.20 | 1917.03 | 0.00 | 0.00 | 0.00 | 0.00 | 861933.33 | ▇▁▁▁▁ |
| e03500 | 0 | 1 | 49.36 | 1492.34 | 0.00 | 0.00 | 0.00 | 0.00 | 166995.59 | ▇▁▁▁▁ |
| e07240 | 0 | 1 | 9.26 | 71.48 | 0.00 | 0.00 | 0.00 | 0.00 | 3189.98 | ▇▁▁▁▁ |
| e07260 | 0 | 1 | 61.29 | 2170.94 | 0.00 | 0.00 | 0.00 | 0.00 | 447395.07 | ▇▁▁▁▁ |
| e07300 | 0 | 1 | 3639.95 | 75644.14 | 0.00 | 0.00 | 0.00 | 0.00 | 9982719.22 | ▇▁▁▁▁ |
| e07400 | 0 | 1 | 645.60 | 19296.17 | 0.00 | 0.00 | 0.00 | 0.00 | 4253977.67 | ▇▁▁▁▁ |
| e07600 | 0 | 1 | 117.70 | 4143.98 | 0.00 | 0.00 | 0.00 | 0.00 | 822537.04 | ▇▁▁▁▁ |
| e09700 | 0 | 1 | 1.90 | 331.33 | 0.00 | 0.00 | 0.00 | 0.00 | 133500.78 | ▇▁▁▁▁ |
| e09800 | 0 | 1 | 0.14 | 9.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1260.04 | ▇▁▁▁▁ |
| e09900 | 0 | 1 | 96.17 | 1919.56 | 0.00 | 0.00 | 0.00 | 0.00 | 542137.56 | ▇▁▁▁▁ |
| e11200 | 0 | 1 | 102.84 | 864.85 | 0.00 | 0.00 | 0.00 | 0.00 | 45935.75 | ▇▁▁▁▁ |
| e17500 | 0 | 1 | 1270.37 | 10623.11 | 0.00 | 0.00 | 0.00 | 0.00 | 1065056.49 | ▇▁▁▁▁ |
| e18400 | 0 | 1 | 37128.87 | 266267.84 | 0.00 | 0.00 | 0.00 | 6029.07 | 24180068.87 | ▇▁▁▁▁ |
| e18500 | 0 | 1 | 7398.76 | 22441.88 | 0.00 | 0.00 | 0.00 | 5726.02 | 922543.00 | ▇▁▁▁▁ |
| e19200 | 0 | 1 | 8356.57 | 47032.81 | 0.00 | 0.00 | 0.00 | 6626.29 | 5758134.85 | ▇▁▁▁▁ |
| e19800 | 0 | 1 | 15275.12 | 226380.38 | 0.00 | 0.00 | 0.00 | 1882.09 | 48009239.65 | ▇▁▁▁▁ |
| e20100 | 0 | 1 | 6759.27 | 240670.30 | 0.00 | 0.00 | 0.00 | 0.00 | 47179844.15 | ▇▁▁▁▁ |
| e20400 | 0 | 1 | 9230.92 | 97205.48 | 0.00 | 0.00 | 0.00 | 159.50 | 17305656.15 | ▇▁▁▁▁ |
| g20500 | 0 | 1 | 107.26 | 7204.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1148393.77 | ▇▁▁▁▁ |
| e24515 | 0 | 1 | 6736.95 | 228145.06 | 0.00 | 0.00 | 0.00 | 0.00 | 37808225.86 | ▇▁▁▁▁ |
| e24518 | 0 | 1 | 3901.73 | 315661.94 | 0.00 | 0.00 | 0.00 | 0.00 | 66990623.97 | ▇▁▁▁▁ |
| e26270 | 0 | 1 | 111531.36 | 1348132.36 | -66047467.00 | 0.00 | 0.00 | 0.00 | 105293533.02 | ▁▇▁▁▁ |
| e27200 | 0 | 1 | 148.08 | 7621.02 | -917632.01 | 0.00 | 0.00 | 0.00 | 1289027.48 | ▁▁▇▁▁ |
| e32800 | 0 | 1 | 185.39 | 1058.70 | 0.00 | 0.00 | 0.00 | 0.00 | 9569.95 | ▇▁▁▁▁ |
| e58990 | 0 | 1 | 717.03 | 28038.72 | 0.00 | 0.00 | 0.00 | 0.00 | 5606394.60 | ▇▁▁▁▁ |
| e62900 | 0 | 1 | 4193.99 | 87453.50 | 0.00 | 0.00 | 0.00 | 0.00 | 11668956.72 | ▇▁▁▁▁ |
| e87530 | 0 | 1 | 142.13 | 1970.53 | 0.00 | 0.00 | 0.00 | 0.00 | 234782.73 | ▇▁▁▁▁ |
| elderly_dependents | 0 | 1 | 0.00 | 0.01 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
| f2441 | 0 | 1 | 0.08 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 7.00 | ▇▁▁▁▁ |
| f6251 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
| a_lineno | 0 | 1 | 1.43 | 0.97 | 1.00 | 1.00 | 1.00 | 1.00 | 16.00 | ▇▁▁▁▁ |
| ffpos | 0 | 1 | 1.11 | 0.38 | 1.00 | 1.00 | 1.00 | 1.00 | 12.00 | ▇▁▁▁▁ |
| fips | 0 | 1 | 27.30 | 16.14 | 1.00 | 12.00 | 27.00 | 41.00 | 56.00 | ▇▅▅▆▆ |
| h_seq | 0 | 1 | 46719.25 | 28216.42 | 2.00 | 20496.00 | 46394.50 | 72194.00 | 94097.00 | ▇▆▇▆▇ |
| data_source | 0 | 1 | 0.93 | 0.26 | 0.00 | 1.00 | 1.00 | 1.00 | 1.00 | ▁▁▁▁▇ |
| k1bx14p | 0 | 1 | -14043.88 | 217937.12 | -30771378.76 | 0.00 | 0.00 | 0.00 | 3794650.59 | ▁▁▁▁▇ |
| k1bx14s | 0 | 1 | -995.15 | 66276.07 | -11738981.22 | 0.00 | 0.00 | 0.00 | 8846568.84 | ▁▁▇▁▁ |
| mcaid_ben | 0 | 1 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | ▁▁▇▁▁ |
| mcare_ben | 0 | 1 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | ▁▁▇▁▁ |
| n24 | 0 | 1 | 0.48 | 0.87 | 0.00 | 0.00 | 0.00 | 1.00 | 9.00 | ▇▂▁▁▁ |
| nu06 | 0 | 1 | 0.17 | 0.48 | 0.00 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
| nu13 | 0 | 1 | 0.42 | 0.80 | 0.00 | 0.00 | 0.00 | 1.00 | 7.00 | ▇▁▁▁▁ |
| nu18 | 0 | 1 | 0.60 | 0.95 | 0.00 | 0.00 | 0.00 | 1.00 | 9.00 | ▇▂▁▁▁ |
| n1820 | 0 | 1 | 0.09 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 3.00 | ▇▁▁▁▁ |
| n21 | 0 | 1 | 1.45 | 0.60 | 0.00 | 1.00 | 1.00 | 2.00 | 5.00 | ▇▇▁▁▁ |
| other_ben | 0 | 1 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | ▁▁▇▁▁ |
| p08000 | 0 | 1 | 5.11 | 278.13 | 0.00 | 0.00 | 0.00 | 0.00 | 50401.73 | ▇▁▁▁▁ |
| p22250 | 0 | 1 | -8236.55 | 1306722.46 | -412783864.54 | 0.00 | 0.00 | 0.00 | 130246694.17 | ▁▁▁▇▁ |
| p23250 | 0 | 1 | 304409.32 | 3827598.27 | -93066402.13 | 0.00 | 0.00 | 0.00 | 301474332.45 | ▁▇▁▁▁ |
| e87521 | 0 | 1 | 250.76 | 1022.34 | 0.00 | 0.00 | 0.00 | 0.00 | 15949.91 | ▇▁▁▁▁ |
| s006 | 0 | 1 | 824.22 | 918.84 | 0.01 | 112.51 | 565.20 | 1181.32 | 10802.73 | ▇▁▁▁▁ |
| snap_ben | 0 | 1 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | ▁▁▇▁▁ |
| housing_ben | 0 | 1 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | ▁▁▇▁▁ |
| ssi_ben | 0 | 1 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | ▁▁▇▁▁ |
| tanf_ben | 0 | 1 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | ▁▁▇▁▁ |
| vet_ben | 0 | 1 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | ▁▁▇▁▁ |
| wic_ben | 0 | 1 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | ▁▁▇▁▁ |
| PT_SSTB_income | 0 | 1 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | ▁▁▇▁▁ |
| PT_binc_w2_wages | 0 | 1 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | ▁▁▇▁▁ |
| PT_ubia_property | 0 | 1 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | ▁▁▇▁▁ |
tdall |>
write_csv(here::here(tddir, "scratch", "tdall.csv"))# run baseline tax on stacked file ----
# if can't run due to permission error, delete dataframes created from output file and closeAllConnections()
# rm(sdf, sdf2)
# closeAllConnections()
# run tax-calculator on stacked pe-td same variables file ----
# a <- unlink(out1path, force=TRUE, recursive=TRUE)
cmd1 <- "C:/Users/donbo/anaconda3/Scripts/tc.exe"
args1 <- c(shQuote("E:/data/taxdata-psl/scratch/petdstack.csv"), "2023",
"--dump",
"--outdir", "E:/data/taxdata-psl/scratch/")
system2(cmd1, args1, wait=TRUE)
# run tax-calculator on taxdata all variables file ----
# rm(out2df)
# a <- unlink(out2path, force=TRUE, recursive=TRUE)
cmd2 <- "C:/Users/donbo/anaconda3/Scripts/tc.exe"
args2 <- c(shQuote("E:/data/taxdata-psl/scratch/tdall.csv"), "2023",
"--dump",
"--outdir", "E:/data/taxdata-psl/scratch/")
system2(cmd2, args2, wait=TRUE)
# https://www.sharepointdiary.com/2020/12/powershell-delete-file.html
# #:~:text=To%20force%20delete%20a%20file,and%20delete%20the%20file%20immediately
# Remove-Item -path C:\Temp\example.txt -Force
# system('powershell -file "C:\\directoryName\\coolScript.ps1"')
# system('powershell Remove-Item -path C:\Temp\example.txt -Force')# get the two output files, save them with stacked record labels ----------------------------------------
out1fn <- "petdstack-23-#-#-#.csv"
out1path <- path(tddir, "scratch", out1fn)
out1df <- vroom(out1path) |>
mutate(src=petdstack$src)
glimpse(out1df)
count(out1df, src)
out2fn <- "tdall-23-#-#-#.csv"
out2path <- path(tddir, "scratch", out2fn)
out2df <- vroom(out2path) |> # Rows: sum Columns: 209
mutate(src=tdall$src)
glimpse(out2df)
count(out2df, src)
stackoutput <- bind_rows(out1df, out2df)
saveRDS(stackoutput, path(tddir, "scratch", "tcoutput.rds"))