2  Preliminaries

Code
source(here::here("r", "libraries.r"))
library(jsonlite)
library(tidyjson)
Code
tddir <- r"(E:\data\taxdata-psl)"

# URL of tax-calculator variables json file
url <- "https://raw.githubusercontent.com/PSLmodels/Tax-Calculator/master/taxcalc/records_variables.json"

url_pe_flat <- "https://github.com/PSLmodels/tax-microdata-benchmarking/releases/download/policyengine-cps-flat-file/tax_microdata.csv.gz"

wrp <- r"(c:\Program Files\WinRAR\WinRAR.exe)"
pez <- path(tddir, "tax_microdata.csv.gz")
Code
# #| code-fold: true

# update this if taxdata variables change

jsdata <- fromJSON(url)
names(jsdata)
jsdata$read # 106
jsdata$calc # 103

allvars <- bind_rows(tibble(vtype="read", lst=jsdata$read),
                tibble(vtype="calc", lst=jsdata$calc)) |> 
  mutate(vname=names(lst)) |> 
  unnest_wider(col=lst) |>
  unnest_longer(col=form) |> 
  rename(formyears=form_id)|> 
  relocate(vname)

glimpse(allvars)
count(allvars, vtype)
count(allvars, type)
count(allvars, availability)

saveRDS(allvars, here::here("data", "tcvars.rds"))
Code
# only update when the pe flatfile changes
download.file(url=url_pe_flat, destfile = pez, mode="wb")
Code
tcvars <- readRDS(here::here("data", "tcvars.rds"))
# ht(tcvars)

utcvars <- tcvars |> 
  select(vname, vtype, desc) |> 
  distinct()


# get frozen tax-calculator file for 2023 ----
tdfn <- "tc23.csv"
tdpath <- path(tddir, tdfn)
tddf <- vroom(tdpath) # Rows: 252868 Columns: 209
# glimpse(tddf)
Code
# get the file modification date of the csv file using winrar (wrp)
#   because that will not change the date the way other utilities do

# un-winrar the file
command <- sprintf('"%s" x -ibck -o+ "%s" "*.*" "%s"', wrp, pez, tddir)
system(command, wait = TRUE)

# now that we have the file unzipped, get its date
pefn <- "tax_microdata.csv"
pepath <- path(tddir, pefn)
pedata <- file_info(pepath)
# glimpse(pedata)
pedate <- pedata$modification_time
# we can delete the csv file if we want

## get the pe data directly from the gz file ----
pedf <- vroom(pez) # read directly from the gz file Rows: 155312 Columns: 62                                                                                                                                                                                        
# glimpse(pedf)

This file examines the version of the Policy Engine flat file created on 2024-02-20 09:35:51.

2.1 Variables that are in the Policy Engine flat file, if any, but are not in taxdata:

Code
# Nikhil fixed this
setdiff(names(pedf), names(tddf)) |> sort() # "e00800p" "e00800s" "e01500p" "e01500s" "e02300p" "e02300s" "e02400p" "e02400s"
character(0)

Note: Prior versions of the PE flat file included the following variables that were not in taxdata. It appears that they now have been removed:

2.2 Non-calculated variables that are in taxdata but are not in the Policy Engine flat file

Code
utcvars |> 
  filter(vtype=="read",
         !vname %in% names(pedf)) |> 
  select(vname, desc) |> 
  arrange(vname) |> 
  gt()
vname desc
MIDR 1 if separately filing spouse itemizes; otherwise 0
PT_SSTB_income Value of one implies business income is from a specified service trade or business (SSTB); value of zero implies business income is from a qualified trade or business
PT_binc_w2_wages Filing unit's share of total W-2 wages paid by the pass-through business
PT_ubia_property Filing unit's share of total business property owned by the pass-through business
a_lineno CPS line number for the person record of the head of the tax filing unit (not used in tax-calculation logic)
agi_bin Historical AGI category used in data extrapolation
cmbtp Estimate of income on (AMT) Form 6251 but not in AGI
data_source 1 if unit is created primarily from IRS-SOI PUF data; 0 if created primarily from CPS data (not used in tax-calculation logic)
e00700 Taxable refunds of state and local income taxes
e01200 Other net gain/loss from Form 4797
e02000 Sch E total rental, royalty, partnership, S-corporation, etc, income/loss (includes e26270 and e27200)
e03220 Educator expenses
e03230 Tuition and fees from Form 8917
e03240 Domestic production activities from Form 8903
e03290 Health savings account deduction from Form 8889
e03300 Contributions to SEP, SIMPLE and qualified plans
e03400 Penalty on early withdrawal of savings
e03500 Alimony paid
e07240 Retirement savings contributions credit from Form 8880
e07260 Residential energy credit from Form 5695
e07300 Foreign tax credit from Form 1116
e07400 General business credit from Form 3800
e07600 Prior year minimum tax credit from Form 8801
e09700 Recapture of Investment Credit
e09800 Unreported payroll taxes from Form 4137 or 8919
e09900 Penalty tax on qualified retirement plans
e11200 Excess payroll (FICA/RRTA) tax withheld
e18500 Itemizable real-estate taxes paid
e19200 Itemizable interest paid
e19800 Itemizable charitable giving: cash/check contributions. WARNING: this variable is already capped in PUF data.
e20100 Itemizable charitable giving: other than cash/check contributions. WARNING: this variable is already capped in PUF data.
e20400 Itemizable miscellaneous deductions. WARNING: this variable is zero below the floor in PUF data.
e24515 Sch D: Un-Recaptured Section 1250 Gain
e24518 Sch D: 28% Rate Gain or Loss
e26270 Sch E: Combined partnership and S-corporation net income/loss (includes k1bx14p and k1bx14s amounts and is included in e02000)
e27200 Sch E: Farm rent net income or loss (included in e02000)
e58990 Investment income elected amount from Form 4952
e62900 Alternative Minimum Tax foreign tax credit from Form 6251
e87521 Total tentative AmOppCredit amount for all students
e87530 Adjusted qualified lifetime learning expenses for all students
f6251 1 if Form 6251 (AMT) attached to return; otherwise 0
ffpos CPS family identifier within household (not used in tax-calculation logic)
g20500 Itemizable gross (before 10% AGI disregard) casualty or theft loss
h_seq CPS household sequence number (not used in tax-calculation logic)
k1bx14p Partner self-employment earnings/loss for taxpayer (included in e26270 total)
k1bx14s Partner self-employment earnings/loss for spouse (included in e26270 total)
mcare_ben Imputed Medicare benefits expressed as the actuarial value of Medicare health insurance
other_ben Non-imputed benefits
p08000 Other tax credits (but not including Sch R credit)
vet_ben Imputed Veteran's benefits

2.3 Summary info on files that will go into Tax-Calculator

Code
goodvars <- intersect(names(tddf), names(pedf))

petdstack <- bind_rows(
  tddf |> select(any_of(goodvars)) |> mutate(src="td"),
  pedf |> select(any_of(goodvars)) |> mutate(src="pe"))

skim(petdstack) # good, no missing values
Data summary
Name petdstack
Number of rows 408180
Number of columns 57
_______________________
Column type frequency:
character 1
numeric 56
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
src 0 1 2 2 0 2 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
DSI 0 1 0.02 0.14 0.00 0.00 0.00 0.00 1.00 ▇▁▁▁▁
EIC 0 1 0.28 0.70 0.00 0.00 0.00 0.00 3.00 ▇▁▁▁▁
FLPDYR 0 1 2023.38 0.49 2023.00 2023.00 2023.00 2024.00 2024.00 ▇▁▁▁▅
MARS 0 1 1.86 1.02 1.00 1.00 2.00 2.00 5.00 ▇▇▁▂▁
RECID 0 1 20852996.64 40660279.83 1.00 101397.75 202680.50 6319501.25 108897800.00 ▇▁▁▁▂
XTOT 0 1 2.02 1.28 0.00 1.00 2.00 3.00 13.00 ▇▃▁▁▁
age_head 0 1 47.54 17.51 0.00 33.00 47.00 61.00 85.00 ▁▇▇▇▃
age_spouse 0 1 21.24 25.90 0.00 0.00 0.00 45.00 96.00 ▇▂▃▂▁
blind_head 0 1 0.02 0.13 0.00 0.00 0.00 0.00 1.00 ▇▁▁▁▁
blind_spouse 0 1 0.00 0.05 0.00 0.00 0.00 0.00 1.00 ▇▁▁▁▁
e00200 0 1 285514.70 1833040.88 0.00 5598.14 45591.53 117855.65 121494256.00 ▇▁▁▁▁
e00200p 0 1 164137.92 1202918.17 0.00 0.00 31479.86 78001.83 105560584.00 ▇▁▁▁▁
e00200s 0 1 121376.79 1119019.36 0.00 0.00 0.00 15817.47 121494256.00 ▇▁▁▁▁
e00300 0 1 19593.95 380849.98 -7974000.00 0.00 0.00 236.06 75919241.73 ▇▁▁▁▁
e00400 0 1 9969.80 112507.90 0.00 0.00 0.00 0.00 15409925.50 ▇▁▁▁▁
e00600 0 1 49718.29 700665.65 0.00 0.00 0.00 189.99 98956000.00 ▇▁▁▁▁
e00650 0 1 38939.43 641371.90 0.00 0.00 0.00 80.00 95590000.00 ▇▁▁▁▁
e00800 0 1 45.15 1849.89 0.00 0.00 0.00 0.00 277528.49 ▇▁▁▁▁
e00900 0 1 13491.49 251457.62 -43485522.52 0.00 0.00 0.00 26459762.31 ▁▁▁▇▁
e00900p 0 1 11730.97 244948.36 -43485522.52 0.00 0.00 0.00 26459762.31 ▁▁▁▇▁
e00900s 0 1 1760.51 48947.77 -7877854.08 0.00 0.00 0.00 10220239.95 ▁▁▇▁▁
e01100 0 1 41.78 2841.44 0.00 0.00 0.00 0.00 934299.43 ▇▁▁▁▁
e01400 0 1 6554.49 97809.10 0.00 0.00 0.00 0.00 10246224.44 ▇▁▁▁▁
e01500 0 1 30633.05 328754.94 0.00 0.00 0.00 0.00 75570690.19 ▇▁▁▁▁
e01700 0 1 6264.72 45952.23 0.00 0.00 0.00 0.00 5627129.48 ▇▁▁▁▁
e02100 0 1 16.52 55521.00 -10218677.07 0.00 0.00 0.00 10730000.00 ▁▁▇▁▁
e02100p 0 1 9.37 54293.24 -10218677.07 0.00 0.00 0.00 10730000.00 ▁▁▇▁▁
e02100s 0 1 7.16 10015.31 -3391138.88 0.00 0.00 0.00 1502646.79 ▁▁▁▇▁
e02300 0 1 103.79 755.35 0.00 0.00 0.00 0.00 50000.00 ▇▁▁▁▁
e02400 0 1 5325.16 12170.05 0.00 0.00 0.00 0.00 146110.40 ▇▁▁▁▁
e03150 0 1 125.68 1301.94 0.00 0.00 0.00 0.00 19139.90 ▇▁▁▁▁
e03210 0 1 51.87 368.97 0.00 0.00 0.00 0.00 3987.48 ▇▁▁▁▁
e03270 0 1 751.21 4039.89 0.00 0.00 0.00 0.00 88881.02 ▇▁▁▁▁
e17500 0 1 2834.25 10849.17 0.00 0.00 0.00 1077.39 1383698.38 ▇▁▁▁▁
e18400 0 1 27982.48 232567.40 -5675.82 0.00 0.00 2056.38 24180068.87 ▇▁▁▁▁
e32800 0 1 171.10 973.35 0.00 0.00 0.00 0.00 9569.95 ▇▁▁▁▁
elderly_dependents 0 1 0.00 0.01 0.00 0.00 0.00 0.00 1.00 ▇▁▁▁▁
f2441 0 1 0.17 0.56 0.00 0.00 0.00 0.00 7.00 ▇▁▁▁▁
fips 0 1 27.33 16.22 1.00 12.00 27.00 41.00 56.00 ▇▅▅▆▆
housing_ben 0 1 66.90 753.21 0.00 0.00 0.00 0.00 31424.64 ▇▁▁▁▁
mcaid_ben 0 1 837.15 4644.91 0.00 0.00 0.00 0.00 144528.75 ▇▁▁▁▁
n1820 0 1 0.08 0.29 0.00 0.00 0.00 0.00 4.00 ▇▁▁▁▁
n21 0 1 1.41 0.59 0.00 1.00 1.00 2.00 6.00 ▇▆▁▁▁
n24 0 1 0.47 0.89 0.00 0.00 0.00 1.00 9.00 ▇▁▁▁▁
nu06 0 1 0.16 0.47 0.00 0.00 0.00 0.00 6.00 ▇▁▁▁▁
nu13 0 1 0.38 0.79 0.00 0.00 0.00 0.00 7.00 ▇▁▁▁▁
nu18 0 1 0.55 0.95 0.00 0.00 0.00 1.00 10.00 ▇▁▁▁▁
p22250 0 1 -5063.82 1028508.44 -412783864.54 0.00 0.00 0.00 130246694.17 ▁▁▁▇▁
p23250 0 1 320028.59 3617295.26 -93066402.13 0.00 0.00 0.00 301474332.45 ▁▇▁▁▁
pencon_p 0 1 2097.61 5359.55 0.00 0.00 0.00 1200.00 680000.00 ▇▁▁▁▁
pencon_s 0 1 1277.70 4357.63 0.00 0.00 0.00 0.00 130000.00 ▇▁▁▁▁
s006 0 1 974.95 1293.27 0.00 15.67 522.83 1400.56 18203.98 ▇▁▁▁▁
snap_ben 0 1 120.25 798.28 0.00 0.00 0.00 0.00 27992.40 ▇▁▁▁▁
ssi_ben 0 1 108.60 1088.74 0.00 0.00 0.00 0.00 39612.00 ▇▁▁▁▁
tanf_ben 0 1 17.68 387.56 0.00 0.00 0.00 0.00 25776.50 ▇▁▁▁▁
wic_ben 0 1 6.63 112.96 0.00 0.00 0.00 0.00 5073.95 ▇▁▁▁▁
Code
# save stacked file ----
petdstack |> 
  write_csv(here::here(tddir, "scratch", "petdstack.csv"))

# save td file without calc variables ----
tdkeepvars <- utcvars |> 
  filter(vtype=="read",
         vname %in% names(tddf))

tdall <- tddf |> 
  select(any_of(tdkeepvars$vname)) |> mutate(src="tdall")
skim(tdall)
Data summary
Name tdall
Number of rows 252868
Number of columns 107
_______________________
Column type frequency:
character 1
numeric 106
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
src 0 1 5 5 0 1 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
DSI 0 1 0.03 0.18 0.00 0.00 0.00 0.00 1.00 ▇▁▁▁▁
EIC 0 1 0.17 0.56 0.00 0.00 0.00 0.00 3.00 ▇▁▁▁▁
FLPDYR 0 1 2023.00 0.00 2023.00 2023.00 2023.00 2023.00 2023.00 ▁▁▇▁▁
MARS 0 1 1.80 0.88 1.00 1.00 2.00 2.00 4.00 ▇▇▁▁▂
MIDR 0 1 0.01 0.08 0.00 0.00 0.00 0.00 1.00 ▇▁▁▁▁
RECID 0 1 126434.50 72996.85 1.00 63217.75 126434.50 189651.25 252868.00 ▇▇▇▇▇
XTOT 0 1 2.11 1.30 0.00 1.00 2.00 3.00 10.00 ▇▃▁▁▁
age_head 0 1 46.09 16.44 1.00 33.00 46.00 59.00 85.00 ▁▇▇▇▂
age_spouse 0 1 23.29 26.29 0.00 0.00 0.00 48.00 96.00 ▇▂▃▂▁
agi_bin 0 1 7.38 4.62 0.00 3.00 8.00 11.00 18.00 ▇▆▆▇▂
blind_head 0 1 0.01 0.11 0.00 0.00 0.00 0.00 1.00 ▇▁▁▁▁
blind_spouse 0 1 0.00 0.05 0.00 0.00 0.00 0.00 1.00 ▇▁▁▁▁
cmbtp 0 1 27175.40 831505.44 -38029107.62 0.00 0.00 0.00 132950663.52 ▁▇▁▁▁
e00200 0 1 241188.14 1153830.96 0.00 201.60 39078.45 118941.16 87662890.23 ▇▁▁▁▁
e00200p 0 1 145270.13 770865.52 0.00 0.00 23105.91 70713.39 43831445.12 ▇▁▁▁▁
e00200s 0 1 95918.01 626005.05 0.00 0.00 0.00 22371.64 43831445.12 ▇▁▁▁▁
pencon_p 0 1 2619.34 5972.99 0.00 0.00 0.00 0.00 25587.08 ▇▁▁▁▁
pencon_s 0 1 1720.10 5238.42 0.00 0.00 0.00 0.00 25587.08 ▇▁▁▁▁
e00300 0 1 23276.97 423509.43 0.00 0.00 0.00 302.30 75919241.73 ▇▁▁▁▁
e00400 0 1 10464.08 114433.60 0.00 0.00 0.00 0.00 15409925.50 ▇▁▁▁▁
e00600 0 1 43898.22 566942.77 0.00 0.00 0.00 436.98 70391623.86 ▇▁▁▁▁
e00650 0 1 33349.35 505240.65 0.00 0.00 0.00 208.99 70391623.86 ▇▁▁▁▁
e00700 0 1 2924.51 35725.19 0.00 0.00 0.00 0.00 4584005.14 ▇▁▁▁▁
e00800 0 1 58.01 2140.94 0.00 0.00 0.00 0.00 277528.49 ▇▁▁▁▁
e00900 0 1 15515.39 270021.29 -43485522.52 0.00 0.00 0.00 26459762.31 ▁▁▁▇▁
e00900p 0 1 13107.81 260937.16 -43485522.52 0.00 0.00 0.00 26459762.31 ▁▁▁▇▁
e00900s 0 1 2407.59 59050.15 -7877854.08 0.00 0.00 0.00 10220239.95 ▁▁▇▁▁
e01100 0 1 67.45 3609.85 0.00 0.00 0.00 0.00 934299.43 ▇▁▁▁▁
e01200 0 1 -6956.64 453889.80 -67056722.27 0.00 0.00 0.00 44120613.22 ▁▁▁▇▁
e01400 0 1 10580.28 124096.28 0.00 0.00 0.00 0.00 10246224.44 ▇▁▁▁▁
e01500 0 1 47782.30 416591.35 0.00 0.00 0.00 0.00 75570690.19 ▇▁▁▁▁
e01700 0 1 8446.89 57047.66 0.00 0.00 0.00 0.00 5627129.48 ▇▁▁▁▁
e02000 0 1 134290.17 1349169.19 -52824630.63 0.00 0.00 0.00 105487980.91 ▁▇▁▁▁
e02100 0 1 -414.22 56357.39 -10218677.07 0.00 0.00 0.00 3585758.79 ▁▁▁▇▁
e02100p 0 1 -407.58 54439.54 -10218677.07 0.00 0.00 0.00 3585758.79 ▁▁▁▇▁
e02100s 0 1 -6.65 12614.12 -3391138.88 0.00 0.00 0.00 1502646.79 ▁▁▁▇▁
e02300 0 1 116.95 589.74 0.00 0.00 0.00 0.00 22930.01 ▇▁▁▁▁
e02400 0 1 5120.01 12364.86 0.00 0.00 0.00 0.00 146110.40 ▇▁▁▁▁
e03150 0 1 202.87 1649.39 0.00 0.00 0.00 0.00 19139.90 ▇▁▁▁▁
e03210 0 1 83.73 465.93 0.00 0.00 0.00 0.00 3987.48 ▇▁▁▁▁
e03220 0 1 8.97 62.36 0.00 0.00 0.00 0.00 797.50 ▇▁▁▁▁
e03230 0 1 38.48 442.84 0.00 0.00 0.00 0.00 6379.97 ▇▁▁▁▁
e03240 0 1 2961.42 50187.76 0.00 0.00 0.00 0.00 7617678.69 ▇▁▁▁▁
e03270 0 1 1212.60 5077.93 0.00 0.00 0.00 0.00 88881.02 ▇▁▁▁▁
e03290 0 1 119.61 959.61 0.00 0.00 0.00 0.00 12402.00 ▇▁▁▁▁
e03300 0 1 2351.33 18756.94 0.00 0.00 0.00 0.00 754271.41 ▇▁▁▁▁
e03400 0 1 12.20 1917.03 0.00 0.00 0.00 0.00 861933.33 ▇▁▁▁▁
e03500 0 1 49.36 1492.34 0.00 0.00 0.00 0.00 166995.59 ▇▁▁▁▁
e07240 0 1 9.26 71.48 0.00 0.00 0.00 0.00 3189.98 ▇▁▁▁▁
e07260 0 1 61.29 2170.94 0.00 0.00 0.00 0.00 447395.07 ▇▁▁▁▁
e07300 0 1 3639.95 75644.14 0.00 0.00 0.00 0.00 9982719.22 ▇▁▁▁▁
e07400 0 1 645.60 19296.17 0.00 0.00 0.00 0.00 4253977.67 ▇▁▁▁▁
e07600 0 1 117.70 4143.98 0.00 0.00 0.00 0.00 822537.04 ▇▁▁▁▁
e09700 0 1 1.90 331.33 0.00 0.00 0.00 0.00 133500.78 ▇▁▁▁▁
e09800 0 1 0.14 9.34 0.00 0.00 0.00 0.00 1260.04 ▇▁▁▁▁
e09900 0 1 96.17 1919.56 0.00 0.00 0.00 0.00 542137.56 ▇▁▁▁▁
e11200 0 1 102.84 864.85 0.00 0.00 0.00 0.00 45935.75 ▇▁▁▁▁
e17500 0 1 1270.37 10623.11 0.00 0.00 0.00 0.00 1065056.49 ▇▁▁▁▁
e18400 0 1 37128.87 266267.84 0.00 0.00 0.00 6029.07 24180068.87 ▇▁▁▁▁
e18500 0 1 7398.76 22441.88 0.00 0.00 0.00 5726.02 922543.00 ▇▁▁▁▁
e19200 0 1 8356.57 47032.81 0.00 0.00 0.00 6626.29 5758134.85 ▇▁▁▁▁
e19800 0 1 15275.12 226380.38 0.00 0.00 0.00 1882.09 48009239.65 ▇▁▁▁▁
e20100 0 1 6759.27 240670.30 0.00 0.00 0.00 0.00 47179844.15 ▇▁▁▁▁
e20400 0 1 9230.92 97205.48 0.00 0.00 0.00 159.50 17305656.15 ▇▁▁▁▁
g20500 0 1 107.26 7204.31 0.00 0.00 0.00 0.00 1148393.77 ▇▁▁▁▁
e24515 0 1 6736.95 228145.06 0.00 0.00 0.00 0.00 37808225.86 ▇▁▁▁▁
e24518 0 1 3901.73 315661.94 0.00 0.00 0.00 0.00 66990623.97 ▇▁▁▁▁
e26270 0 1 111531.36 1348132.36 -66047467.00 0.00 0.00 0.00 105293533.02 ▁▇▁▁▁
e27200 0 1 148.08 7621.02 -917632.01 0.00 0.00 0.00 1289027.48 ▁▁▇▁▁
e32800 0 1 185.39 1058.70 0.00 0.00 0.00 0.00 9569.95 ▇▁▁▁▁
e58990 0 1 717.03 28038.72 0.00 0.00 0.00 0.00 5606394.60 ▇▁▁▁▁
e62900 0 1 4193.99 87453.50 0.00 0.00 0.00 0.00 11668956.72 ▇▁▁▁▁
e87530 0 1 142.13 1970.53 0.00 0.00 0.00 0.00 234782.73 ▇▁▁▁▁
elderly_dependents 0 1 0.00 0.01 0.00 0.00 0.00 0.00 1.00 ▇▁▁▁▁
f2441 0 1 0.08 0.34 0.00 0.00 0.00 0.00 7.00 ▇▁▁▁▁
f6251 0 1 0.22 0.41 0.00 0.00 0.00 0.00 1.00 ▇▁▁▁▂
a_lineno 0 1 1.43 0.97 1.00 1.00 1.00 1.00 16.00 ▇▁▁▁▁
ffpos 0 1 1.11 0.38 1.00 1.00 1.00 1.00 12.00 ▇▁▁▁▁
fips 0 1 27.30 16.14 1.00 12.00 27.00 41.00 56.00 ▇▅▅▆▆
h_seq 0 1 46719.25 28216.42 2.00 20496.00 46394.50 72194.00 94097.00 ▇▆▇▆▇
data_source 0 1 0.93 0.26 0.00 1.00 1.00 1.00 1.00 ▁▁▁▁▇
k1bx14p 0 1 -14043.88 217937.12 -30771378.76 0.00 0.00 0.00 3794650.59 ▁▁▁▁▇
k1bx14s 0 1 -995.15 66276.07 -11738981.22 0.00 0.00 0.00 8846568.84 ▁▁▇▁▁
mcaid_ben 0 1 0.00 0.00 0.00 0.00 0.00 0.00 0.00 ▁▁▇▁▁
mcare_ben 0 1 0.00 0.00 0.00 0.00 0.00 0.00 0.00 ▁▁▇▁▁
n24 0 1 0.48 0.87 0.00 0.00 0.00 1.00 9.00 ▇▂▁▁▁
nu06 0 1 0.17 0.48 0.00 0.00 0.00 0.00 4.00 ▇▁▁▁▁
nu13 0 1 0.42 0.80 0.00 0.00 0.00 1.00 7.00 ▇▁▁▁▁
nu18 0 1 0.60 0.95 0.00 0.00 0.00 1.00 9.00 ▇▂▁▁▁
n1820 0 1 0.09 0.30 0.00 0.00 0.00 0.00 3.00 ▇▁▁▁▁
n21 0 1 1.45 0.60 0.00 1.00 1.00 2.00 5.00 ▇▇▁▁▁
other_ben 0 1 0.00 0.00 0.00 0.00 0.00 0.00 0.00 ▁▁▇▁▁
p08000 0 1 5.11 278.13 0.00 0.00 0.00 0.00 50401.73 ▇▁▁▁▁
p22250 0 1 -8236.55 1306722.46 -412783864.54 0.00 0.00 0.00 130246694.17 ▁▁▁▇▁
p23250 0 1 304409.32 3827598.27 -93066402.13 0.00 0.00 0.00 301474332.45 ▁▇▁▁▁
e87521 0 1 250.76 1022.34 0.00 0.00 0.00 0.00 15949.91 ▇▁▁▁▁
s006 0 1 824.22 918.84 0.01 112.51 565.20 1181.32 10802.73 ▇▁▁▁▁
snap_ben 0 1 0.00 0.00 0.00 0.00 0.00 0.00 0.00 ▁▁▇▁▁
housing_ben 0 1 0.00 0.00 0.00 0.00 0.00 0.00 0.00 ▁▁▇▁▁
ssi_ben 0 1 0.00 0.00 0.00 0.00 0.00 0.00 0.00 ▁▁▇▁▁
tanf_ben 0 1 0.00 0.00 0.00 0.00 0.00 0.00 0.00 ▁▁▇▁▁
vet_ben 0 1 0.00 0.00 0.00 0.00 0.00 0.00 0.00 ▁▁▇▁▁
wic_ben 0 1 0.00 0.00 0.00 0.00 0.00 0.00 0.00 ▁▁▇▁▁
PT_SSTB_income 0 1 0.00 0.00 0.00 0.00 0.00 0.00 0.00 ▁▁▇▁▁
PT_binc_w2_wages 0 1 0.00 0.00 0.00 0.00 0.00 0.00 0.00 ▁▁▇▁▁
PT_ubia_property 0 1 0.00 0.00 0.00 0.00 0.00 0.00 0.00 ▁▁▇▁▁
Code
tdall |> 
  write_csv(here::here(tddir, "scratch", "tdall.csv"))
Code
# run baseline tax on stacked file ----
# if can't run due to permission error, delete dataframes created from output file and closeAllConnections()
# rm(sdf, sdf2)
# closeAllConnections()

# run tax-calculator on stacked pe-td same variables file ----
# a <- unlink(out1path, force=TRUE, recursive=TRUE)
cmd1 <- "C:/Users/donbo/anaconda3/Scripts/tc.exe"
args1 <- c(shQuote("E:/data/taxdata-psl/scratch/petdstack.csv"), "2023",
          "--dump",
          "--outdir", "E:/data/taxdata-psl/scratch/")
system2(cmd1, args1, wait=TRUE)


# run tax-calculator on taxdata all variables file ----
# rm(out2df)
# a <- unlink(out2path, force=TRUE, recursive=TRUE)
cmd2 <- "C:/Users/donbo/anaconda3/Scripts/tc.exe"
args2 <- c(shQuote("E:/data/taxdata-psl/scratch/tdall.csv"), "2023",
          "--dump",
          "--outdir", "E:/data/taxdata-psl/scratch/")
system2(cmd2, args2, wait=TRUE)

# https://www.sharepointdiary.com/2020/12/powershell-delete-file.html
# #:~:text=To%20force%20delete%20a%20file,and%20delete%20the%20file%20immediately
# Remove-Item -path C:\Temp\example.txt -Force
# system('powershell -file "C:\\directoryName\\coolScript.ps1"')
# system('powershell Remove-Item -path C:\Temp\example.txt -Force')
Code
# get the two output files, save them with stacked record labels ----------------------------------------
out1fn <- "petdstack-23-#-#-#.csv"
out1path <- path(tddir, "scratch", out1fn)
out1df <- vroom(out1path) |>
  mutate(src=petdstack$src)
glimpse(out1df)
count(out1df, src)

out2fn <- "tdall-23-#-#-#.csv"
out2path <- path(tddir, "scratch", out2fn)
out2df <- vroom(out2path) |>  # Rows: sum Columns: 209
  mutate(src=tdall$src)
glimpse(out2df)
count(out2df, src)

stackoutput <- bind_rows(out1df, out2df)

saveRDS(stackoutput, path(tddir, "scratch", "tcoutput.rds"))