using CSV
using DataFrames
using StringEncodings
using Plots
using LinearAlgebra
using Statistics
using StatsBase
using Printf
using ForwardDiff
using Random
using NLopt
using StatsPlots
using CategoricalArrays
using Serialization6 応用編
for file in readdir("functions/entry_exit/")
include("functions/entry_exit/" * file)
end6.1 データの読み込み
data_raw = data = CSV.File(
open(
read,
"data/entry_exit_application/data_hospital_chapter6.csv",
enc"UTF-8"
),
missingstring = ["NA", ""],
) |> DataFrame
first(data, 5)5×20 DataFrame
| Row | HospitalID | Prefecture | CityCode | SecondaryAreaName | SecondaryAreaCode | Management | Kyukyu | Kinou | Sien | Hyoka | DepNeurology | DepNeurosurgery | NumBeds | MRINumOwn | Tesla | Population | Menseki | PopDensity | TaxableIncome | MRIOwnDum |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Int64 | String15 | Int64 | String31 | Int64? | String? | Int64? | Int64? | Int64? | Int64? | Int64? | Int64? | Int64? | Int64 | Int64? | Int64? | Float64? | Float64? | Int64? | Int64 | |
| 1 | 1 | 北海道 | 1101 | 札幌 | 104 | 共済 | 1 | 0 | 0 | 0 | 0 | 0 | 243 | 1 | 2 | 220189 | 46.42 | 4743.4 | 3017 | 1 |
| 2 | 2 | 北海道 | 1101 | 札幌 | 104 | 財団 | 0 | 0 | 0 | 0 | 0 | 0 | 180 | 1 | 0 | 220189 | 46.42 | 4743.4 | 3017 | 1 |
| 3 | 3 | 北海道 | 1101 | 札幌 | 104 | 医療法人 | 1 | 0 | 0 | 0 | 0 | 0 | 110 | 1 | 1 | 220189 | 46.42 | 4743.4 | 3017 | 1 |
| 4 | 4 | 北海道 | 1101 | 札幌 | 104 | 医療法人 | 1 | 0 | 0 | 0 | 1 | 1 | 134 | 3 | 2 | 220189 | 46.42 | 4743.4 | 3017 | 1 |
| 5 | 5 | 北海道 | 1101 | 札幌 | 104 | 医療法人 | 0 | 0 | 0 | 0 | 0 | 0 | 250 | 1 | 2 | 220189 | 46.42 | 4743.4 | 3017 | 1 |
data_cleaned = data_raw[:, :];
replace!(data_cleaned.Management, missing => "");
data_cleaned[!, :DaigakuDum] = in(["公立大学法人", "国(国立大学法人)", "私立学校法人"]).(data_cleaned.Management);
data_cleaned[!, :ZeroBedDum] = (data_cleaned[:, :NumBeds] .== 0);
data_cleaned[!, :NumBeds] = data_cleaned[:, :NumBeds] ./ 100.0;
data_cleaned[!, :LogNumBeds] = log.(data_cleaned[:, :NumBeds] .+ 0.01);
data_cleaned[!, :Population] = data_cleaned[:, :Population] ./ 1e+6;
data_cleaned[!, :Menseki] = data_cleaned[:, :Menseki] ./ 100.0;
data_cleaned[!, :TaxableIncome] = data_cleaned[:, :TaxableIncome] ./ 1000.0;
data_cleaned[!, :LogPop] = log.(data_cleaned[:, :Population]);
data_cleaned[!, :LogIncome] = log.(data_cleaned[:, :TaxableIncome]);6.2 企業レベル変数の記述統計
listVars = [
"Kyukyu", "Kinou", "Sien", "Hyoka", "DepNeurology",
"DepNeurosurgery", "NumBeds", "ZeroBedDum", "DaigakuDum"
]
table_mean = combine(
groupby(data_cleaned, :MRIOwnDum),
[Symbol(variable) for variable in listVars] .=>
x -> mean(skipmissing(x))
);
table_sd = combine(
groupby(data_cleaned, :MRIOwnDum),
[Symbol(variable) for variable in listVars] .=>
x -> std(skipmissing(x))
);
DataFrame(
variable = listVars,
meanOwnMRI = Matrix(table_mean)[2, 2:end],
sdOwnMRI = Matrix(table_sd)[2, 2:end],
meanNotOwnMRI_ = Matrix(table_mean)[1, 2:end],
sdNotOwnMRI = Matrix(table_sd)[1, 2:end]
)9×5 DataFrame
| Row | variable | meanOwnMRI | sdOwnMRI | meanNotOwnMRI_ | sdNotOwnMRI |
|---|---|---|---|---|---|
| String | Float64 | Float64 | Float64 | Float64 | |
| 1 | Kyukyu | 0.711693 | 0.453042 | 0.302131 | 0.459223 |
| 2 | Kinou | 0.0199464 | 0.139837 | 0.00254963 | 0.050434 |
| 3 | Sien | 0.0476332 | 0.213021 | 0.00400656 | 0.0631762 |
| 4 | Hyoka | 0.433462 | 0.495627 | 0.169368 | 0.375111 |
| 5 | DepNeurology | 0.393081 | 0.488507 | 0.121991 | 0.327306 |
| 6 | DepNeurosurgery | 0.569643 | 0.4952 | 0.0821194 | 0.274572 |
| 7 | NumBeds | 2.07728 | 1.9415 | 0.391861 | 0.710501 |
| 8 | ZeroBedDum | 0.058316 | 0.234375 | 0.452934 | 0.497825 |
| 9 | DaigakuDum | 0.0416048 | 0.199714 | 0.00454794 | 0.067291 |
vcat(
calculateShareOwnMRI(data_cleaned),
calculateShareOwnMRI(
dropmissing(data_cleaned, :Kyukyu)[
dropmissing(data_cleaned, :Kyukyu).Kyukyu .== 1,
:]
),
calculateShareOwnMRI(
dropmissing(data_cleaned, :Sien)[
dropmissing(data_cleaned, :Sien).Sien .== 1,
:]
),
calculateShareOwnMRI(
dropmissing(data_cleaned, :Hyoka)[
dropmissing(data_cleaned, :Hyoka).Hyoka .== 1,
:]
),
calculateShareOwnMRI(
dropmissing(data_cleaned, :DepNeurology)[
dropmissing(data_cleaned, :DepNeurology).DepNeurology .== 1,
:]
),
calculateShareOwnMRI(
dropmissing(data_cleaned, :DepNeurosurgery)[
dropmissing(data_cleaned, :DepNeurosurgery).DepNeurosurgery .== 1,
:]
),
calculateShareOwnMRI(
dropmissing(data_cleaned, :LogNumBeds)[
dropmissing(data_cleaned, :LogNumBeds).LogNumBeds .>= log(1.2),
:]
),
calculateShareOwnMRI(
dropmissing(data_cleaned, :DaigakuDum)[
dropmissing(data_cleaned, :DaigakuDum).DaigakuDum .== 1,
:]
),
calculateShareOwnMRI(
dropmissing(data_cleaned, :ZeroBedDum)[
dropmissing(data_cleaned, :ZeroBedDum).ZeroBedDum .== 1,
:]
),
)9×3 Matrix{Float64}:
8862.0 3365.0 37.97
4051.0 2392.0 59.05
182.0 160.0 87.91
2386.0 1456.0 61.02
1987.0 1318.0 66.33
2365.0 1914.0 80.93
2264.0 1932.0 85.34
165.0 140.0 84.85
2674.0 196.0 7.33
6.3 Berry (1992) による推定
サポートサイトの推定方法と異なり、ここでは「各市での参入病院数」のみを用い、「どの病院が参入したか」の情報は用いない。 Berry (1992)で述べられているように、パラメタの推定そのものにはどの病院が参入したかの情報は必要ないためである。 ただし、後の推定結果の評価や反実仮想分析では、「病床数が多い病院から参入するかどうかを決定する」という仮定のもと、どの病院が参入するかを予測する。
data_processed_pre = dropmissing(data_cleaned[:, [
:CityCode, :Kyukyu, :Kinou, :Sien, :Hyoka,
:DepNeurology, :DepNeurosurgery, :LogNumBeds,
:ZeroBedDum, :DaigakuDum,
:Menseki, :LogPop, :LogIncome, :MRIOwnDum
]]);
first(data_processed_pre, 5)5×14 DataFrame
| Row | CityCode | Kyukyu | Kinou | Sien | Hyoka | DepNeurology | DepNeurosurgery | LogNumBeds | ZeroBedDum | DaigakuDum | Menseki | LogPop | LogIncome | MRIOwnDum |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Int64 | Int64 | Int64 | Int64 | Int64 | Int64 | Int64 | Float64 | Bool | Bool | Float64 | Float64 | Float64 | Int64 | |
| 1 | 1101 | 1 | 0 | 0 | 0 | 0 | 0 | 0.891998 | false | false | 0.4642 | -1.51327 | 1.10426 | 1 |
| 2 | 1101 | 0 | 0 | 0 | 0 | 0 | 0 | 0.593327 | false | false | 0.4642 | -1.51327 | 1.10426 | 1 |
| 3 | 1101 | 1 | 0 | 0 | 0 | 0 | 0 | 0.10436 | false | false | 0.4642 | -1.51327 | 1.10426 | 1 |
| 4 | 1101 | 1 | 0 | 0 | 0 | 1 | 1 | 0.300105 | false | false | 0.4642 | -1.51327 | 1.10426 | 1 |
| 5 | 1101 | 0 | 0 | 0 | 0 | 0 | 0 | 0.920283 | false | false | 0.4642 | -1.51327 | 1.10426 | 1 |
Random.seed!(123)
data_processed = processDataForBerryEst(data_processed_pre);
first(data_processed, 5)5×18 DataFrame
| Row | CityCode | Kyukyu | Kinou | Sien | Hyoka | DepNeurology | DepNeurosurgery | LogNumBeds | ZeroBedDum | DaigakuDum | Menseki | LogPop | LogIncome | MRIOwnDum | TieEntryOrder | numPotenHos | numEntryObs | EntryOrderId |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Int64 | Int64 | Int64 | Int64 | Int64 | Int64 | Int64 | Float64 | Bool | Bool | Float64 | Float64 | Float64 | Int64 | Float64 | Int64 | Int64 | Int64 | |
| 1 | 1101 | 1 | 0 | 0 | 0 | 1 | 1 | 2.19165 | false | true | 0.4642 | -1.51327 | 1.10426 | 1 | 0.692209 | 41 | 17 | 1 |
| 2 | 1101 | 1 | 0 | 0 | 0 | 1 | 1 | 2.0931 | false | false | 0.4642 | -1.51327 | 1.10426 | 1 | 0.0320967 | 41 | 17 | 2 |
| 3 | 1101 | 0 | 0 | 0 | 0 | 1 | 0 | 1.61939 | false | false | 0.4642 | -1.51327 | 1.10426 | 1 | 0.136551 | 41 | 17 | 3 |
| 4 | 1101 | 0 | 0 | 0 | 0 | 1 | 0 | 1.59939 | false | false | 0.4642 | -1.51327 | 1.10426 | 1 | 0.334152 | 41 | 17 | 4 |
| 5 | 1101 | 0 | 0 | 0 | 0 | 0 | 0 | 1.14103 | false | false | 0.4642 | -1.51327 | 1.10426 | 1 | 0.427328 | 41 | 17 | 5 |
numPotenHos_max = 4;
data_processed = data_processed[
data_processed.EntryOrderId .<= numPotenHos_max,
:];
transform!(
groupby(data_processed, :CityCode),
:MRIOwnDum => sum => :numEntryObs,
nrow => :numPotenHos
);
data_processed[!, :Const] .= 1;
vcat(
calculateShareOwnMRI(data_processed),
calculateShareOwnMRI(
dropmissing(data_processed, :Kyukyu)[
dropmissing(data_processed, :Kyukyu).Kyukyu .== 1,
:]
),
calculateShareOwnMRI(
dropmissing(data_processed, :Sien)[
dropmissing(data_processed, :Sien).Sien .== 1,
:]
),
calculateShareOwnMRI(
dropmissing(data_processed, :Hyoka)[
dropmissing(data_processed, :Hyoka).Hyoka .== 1,
:]
),
calculateShareOwnMRI(
dropmissing(data_processed, :DepNeurology)[
dropmissing(data_processed, :DepNeurology).DepNeurology .== 1,
:]
),
calculateShareOwnMRI(
dropmissing(data_processed, :DepNeurosurgery)[
dropmissing(data_processed, :DepNeurosurgery).DepNeurosurgery .== 1,
:]
),
calculateShareOwnMRI(
dropmissing(data_processed, :LogNumBeds)[
dropmissing(data_processed, :LogNumBeds).LogNumBeds .>= log(1.2),
:]
),
calculateShareOwnMRI(
dropmissing(data_processed, :DaigakuDum)[
dropmissing(data_processed, :DaigakuDum).DaigakuDum .== 1,
:]
),
calculateShareOwnMRI(
dropmissing(data_processed, :ZeroBedDum)[
dropmissing(data_processed, :ZeroBedDum).ZeroBedDum .== 1,
:]
),
)9×3 Matrix{Float64}:
4112.0 2246.0 54.62
2476.0 1745.0 70.48
151.0 141.0 93.38
1422.0 1097.0 77.14
1276.0 1014.0 79.47
1697.0 1472.0 86.74
1857.0 1628.0 87.67
125.0 115.0 92.0
646.0 41.0 6.35
numSim = 100;
uniqueCityCode = unique(data_processed.CityCode);
numCity = length(uniqueCityCode);
numHos = nrow(data_processed);
numEntryObs = combine(
groupby(data_processed, :CityCode),
:MRIOwnDum => sum => :numEntryObs
).numEntryObs;
cityIndex = data_processed.CityCode .== uniqueCityCode';u_m0 = cityIndex * randn(numCity, numSim);
u_mIm = randn(numHos, numSim);param_init = [
-0.612340533, -5.525423772, -0.505275676,
-0.32531026, -1.04162392, -0.991878025,
-3.87040966, -1.272714254, 2.684741676,
0.040555764, 0.426448612, -1.399627382,
0.990975782, 0.958075433
];function obj_for_Optim(x::Vector, grad::Vector)
if length(grad) != 0
ForwardDiff.gradient!(
grad,
x -> calculateBerryObjectiveAtCityLevel(
x,
data_processed,
u_m0,
u_mIm,
numEntryObs
),
x
)
end
return calculateBerryObjectiveAtCityLevel(
x,
data_processed,
u_m0,
u_mIm,
numEntryObs
)
end
opt = NLopt.Opt(:LN_NELDERMEAD, length(param_init))
opt.lower_bounds = [repeat([-Inf], 13); -1.0]
opt.upper_bounds = [repeat([Inf], 13); 1.0]
opt.min_objective = obj_for_Optim;
@time (minf, minx, ret) = NLopt.optimize(opt, param_init)
@printf(
"Minimized objective value: %.3f \n",
minf
)
[param_init minx] 15.410078 seconds (36.69 M allocations: 29.563 GiB, 11.35% gc time, 9.89% compilation time)
Minimized objective value: 0.374
14×2 Matrix{Float64}:
-0.612341 -0.655829
-5.52542 -8.41047
-0.505276 -0.491684
-0.32531 -0.44966
-1.04162 -1.25152
-0.991878 -1.0654
-3.87041 -3.52228
-1.27271 -1.11907
2.68474 2.58767
0.0405558 0.033967
0.426449 0.394342
-1.39963 -1.44356
0.990976 1.08858
0.958075 0.963786
numBootstrap = 100;
bootEstMat = zeros(length(minx), numBootstrap)
@time for bootIndex in 1:numBootstrap
bootCitySample = sample(uniqueCityCode, numCity, replace = true);
df_boot = reduce(
vcat,
[
transform(
data_processed[
data_processed.CityCode .== bootCitySample[city],
:],
:CityCode => (x -> city) => :CityCode
)
for city in 1:numCity
]
);
numHos_boot = nrow(df_boot);
numEntryObs_boot = combine(
groupby(df_boot, :CityCode),
:MRIOwnDum => sum => :numEntryObs
).numEntryObs;
uniqueCityCode_boot = unique(df_boot.CityCode);
cityIndex_boot = df_boot.CityCode .== uniqueCityCode_boot';
u_m0_boot = cityIndex_boot * randn(numCity, numSim);
u_mIm_boot = randn(numHos_boot, numSim);
function obj_for_Optim(x::Vector, grad::Vector)
if length(grad) != 0
ForwardDiff.gradient!(
grad,
x -> calculateBerryObjectiveAtCityLevel(
x,
df_boot,
u_m0_boot,
u_mIm_boot,
numEntryObs_boot
),
x
)
end
return calculateBerryObjectiveAtCityLevel(
x,
df_boot,
u_m0_boot,
u_mIm_boot,
numEntryObs_boot
)
end
opt_boot = NLopt.Opt(:LN_NELDERMEAD, length(param_init))
opt_boot.lower_bounds = [repeat([-Inf], 13); -1.0]
opt_boot.upper_bounds = [repeat([Inf], 13); 1.0]
opt_boot.min_objective = obj_for_Optim;
(minf_boot, minx_boot, ret_boot) = NLopt.optimize(opt_boot, param_init)
bootEstMat[:, bootIndex] .= minx_boot;
end6.4 推定結果
[minx std(bootEstMat, dims = 2)]14×2 Matrix{Float64}:
-0.655829 0.103045
-8.41047 3.51758
-0.491684 0.158229
-0.44966 0.203663
-1.25152 0.20427
-1.0654 0.104526
-3.52228 0.473332
-1.11907 1.23384
2.58767 0.0825597
0.033967 0.0142215
0.394342 0.0451492
-1.44356 0.189356
1.08858 0.132724
0.963786 0.0157399
6.4.1 モデルによる予測の確認
berry_est = minx;
alpha_est = berry_est[1:8];
beta_est = berry_est[9:12];
delta_est = berry_est[13];
rho_est = berry_est[14];
profitExcludeCompetition = calculateProfitExcludeCompetition(
berry_est,
data_processed,
u_m0,
u_mIm
);
each_entry_mat = calculateEquilibriumNumEntry(
data_processed,
delta_est,
profitExcludeCompetition
);
entryProb = mean(simulateEntryByOrder(
delta_est,
cityIndex,
profitExcludeCompetition,
each_entry_mat
), dims = 2)[:, 1];entryPred = entryProb .> 0.5;
data_predicted = copy(data_processed);
data_predicted[!, :entryProb] = entryProb;
data_predicted[!, :entryPred] = entryPred;
data_predicted_agg = combine(
groupby(data_predicted, :CityCode),
:MRIOwnDum => sum => :Actual,
:entryPred => sum => :Predict
);
data_predicted_agg = stack(data_predicted_agg, [:Actual, :Predict]);
data_predicted_sum = combine(groupby(data_predicted_agg, [:variable, :value]), nrow);
sort!(data_predicted_sum, [:variable, :value]);
groupedbar(
data_predicted_sum.nrow,
group = data_predicted_sum.variable,
xlabel = "Number of hospitals in a city",
ylabel = "Number of cities",
bar_width = 0.67,
xticks = (1:5, 0:4),
lw = 0
)data_predicted[!, :Top25pct_NumBeds] = (
data_processed.LogNumBeds .>= log(1.2)
);
data_predicted[!, :NotNeuro] = (
(data_processed.DepNeurology .!= 1) .&
(data_processed.DepNeurosurgery .!= 1)
);
listVarsShort = [
"Kyukyu", "Sien", "Hyoka", "DepNeurology",
"DepNeurosurgery", "Top25pct_NumBeds",
"ZeroBedDum", "DaigakuDum", "NotNeuro"
]
tableActPred = vcat([
sum.(
eachcol(data_predicted[
data_predicted[:, variable] .== 1,
[:MRIOwnDum, :entryPred]
])
)'
for variable in listVarsShort
]...);
dfActPred = vcat(
DataFrame(
Actual = sum(data_predicted.MRIOwnDum),
Predict = sum(data_predicted.entryPred)
),
DataFrame(
Actual = tableActPred[:, 1],
Predict = tableActPred[:, 2]
)
);
dfActPredStack = stack(dfActPred, [:Actual, :Predict]);
dfActPredStack[:, :category] = repeat(["All"; listVarsShort], 2);
groupedbar(
dfActPredStack.category,
dfActPredStack.value,
group = dfActPredStack.variable,
xlabel = "Number of hospitals in a city",
ylabel = "Number of cities",
bar_width = 0.67,
lw = 0,
xrotation = 30
)6.5 反実仮想分析
data_cf = copy(data_processed);
sort!(
data_cf, [
:CityCode, :DepNeurology, :DepNeurosurgery,
:LogNumBeds, :TieEntryOrder
],
rev = [false, true, true, true, true]
);
transform!(
groupby(data_cf, :CityCode),
:numPotenHos => (x -> 1:length(x)) => :EntryOrderId
);
profitExcludeCompetition_cf = calculateProfitExcludeCompetition(
berry_est,
data_cf,
u_m0,
u_mIm
);
profitExcludeCompetition_cf[
(data_cf.DepNeurology .== 1) .| (data_cf.DepNeurosurgery .== 1),
:] .= 1e+5;each_entry_mat_cf = calculateEquilibriumNumEntry(
data_cf,
delta_est,
profitExcludeCompetition_cf
);
entryProb_cf = mean(simulateEntryByOrder(
delta_est,
cityIndex,
profitExcludeCompetition_cf,
each_entry_mat_cf
), dims = 2)[:, 1];6.5.1 すべての病院
ここでは、各市でいくつの病院がMRIを導入するかのシミュレーション結果を示す。 神経内科・外科を持つ病院がMRIの所有を義務付けられているという反実仮想のもとでは、多くの市でMRIを所有する病院の数が増えている。
entryPred_cf = entryProb_cf .> 0.5;
data_predicted_cf = copy(data_cf);
data_predicted_cf[!, :entryProb] = entryProb_cf;
data_predicted_cf[!, :entryPred] = entryPred_cf;
data_predicted_cf_agg = combine(
groupby(data_predicted_cf, :CityCode),
:MRIOwnDum => sum => :Actual,
:entryPred => sum => :CounterFactual
);
data_predicted_cf_agg = stack(data_predicted_cf_agg, [:Actual, :CounterFactual]);
data_predicted_cf_sum = combine(groupby(
vcat(
data_predicted_agg,
data_predicted_cf_agg[
data_predicted_cf_agg.variable .== "CounterFactual",
:]
),
[:variable, :value]
), nrow);
transform!(
data_predicted_cf_sum,
:variable =>
(
x -> categorical(
x; ordered = true,
levels = ["Actual", "Predict", "CounterFactual"]
)
) =>
:variable
)
sort!(data_predicted_cf_sum, [:variable, :value]);
groupedbar(
data_predicted_cf_sum.nrow,
group = data_predicted_cf_sum.variable,
xlabel = "Number of hospitals in a city",
ylabel = "Number of cities",
bar_width = 0.67,
xticks = (1:5, 0:4),
lw = 0
)vcat(
combine(groupby(
data_predicted_agg, :variable
), :value => sum),
combine(groupby(
data_predicted_cf_agg, :variable
), :value => sum) |>
filter(:variable => (x -> x .== "CounterFactual"))
)3×2 DataFrame
| Row | variable | value_sum |
|---|---|---|
| String | Int64 | |
| 1 | Actual | 2246 |
| 2 | Predict | 2234 |
| 3 | CounterFactual | 2484 |
6.5.2 神経内科・外科を持たない病院のみ
ここでは、神経内科・外科を持たない病院のみを考え、MRIを所有する病院数の変化を見る。 神経内科・外科を持つ病院がMRIを所有するため、そうでない病院はMRIを所有することが少なくなっている。
data_predicted_agg_wo_neuro = combine(
groupby(
filter(
[:DepNeurology, :DepNeurosurgery] =>
((d1, d2) -> (d1 .!= 1) .& (d2 .!= 1)),
data_predicted
),
:CityCode
),
:MRIOwnDum => sum => :Actual,
:entryPred => sum => :Predict
);
data_predicted_agg_wo_neuro = stack(
data_predicted_agg_wo_neuro, [:Actual, :Predict]
);
data_predicted_sum_wo_neuro = combine(
groupby(data_predicted_agg_wo_neuro, [:variable, :value]),
nrow
);
data_predicted_cf_agg_wo_neuro = combine(
groupby(
filter(
[:DepNeurology, :DepNeurosurgery] =>
((d1, d2) -> (d1 .!= 1) .& (d2 .!= 1)),
data_predicted_cf
),
:CityCode
),
:MRIOwnDum => sum => :Actual,
:entryPred => sum => :CounterFactual
);
data_predicted_cf_agg_wo_neuro = stack(
data_predicted_cf_agg_wo_neuro,
[:Actual, :CounterFactual]
);
data_predicted_cf_sum_wo_neuro = combine(groupby(
vcat(
data_predicted_agg_wo_neuro,
data_predicted_cf_agg_wo_neuro[data_predicted_cf_agg_wo_neuro.variable .== "CounterFactual", :]
),
[:variable, :value]
), nrow)
transform!(
data_predicted_cf_sum_wo_neuro,
:variable =>
(x -> categorical(x; ordered = true, levels = ["Actual", "Predict", "CounterFactual"])) =>
:variable
)
sort!(data_predicted_cf_sum_wo_neuro, [:variable, :value]);
groupedbar(
data_predicted_cf_sum_wo_neuro.nrow,
group = data_predicted_cf_sum_wo_neuro.variable,
xlabel = "Number of hospitals in a city",
ylabel = "Number of cities",
bar_width = 0.67,
xticks = (1:5, 0:4),
lw = 0
)vcat(
combine(groupby(
data_predicted_agg_wo_neuro, :variable
), :value => sum),
combine(groupby(
data_predicted_cf_agg_wo_neuro, :variable
), :value => sum) |>
filter(:variable => (x -> x .== "CounterFactual"))
)3×2 DataFrame
| Row | variable | value_sum |
|---|---|---|
| String | Int64 | |
| 1 | Actual | 521 |
| 2 | Predict | 367 |
| 3 | CounterFactual | 340 |
data_predicted_cf[!, :Top25pct_NumBeds] = (
data_predicted_cf.LogNumBeds .>= log(1.2)
);
data_predicted_cf[!, :NotNeuro] = (
(data_predicted_cf.DepNeurology .!= 1) .&
(data_predicted_cf.DepNeurosurgery .!= 1)
);
tableActCounterFactual = vcat([
sum.(
eachcol(data_predicted_cf[
data_predicted_cf[:, variable] .== 1,
[:MRIOwnDum, :entryPred]
])
)'
for variable in listVarsShort
]...);
dfActCounterFactual = vcat(
DataFrame(
Actual = sum(data_predicted_cf.MRIOwnDum),
CounterFactual = sum(data_predicted_cf.entryPred)
),
DataFrame(
Actual = tableActCounterFactual[:, 1],
CounterFactual = tableActCounterFactual[:, 2]
)
);
dfActCounterFactual[:, :Predict] = dfActPred.Predict;
dfActCounterFactualStack = stack(
dfActCounterFactual,
[:Actual, :Predict, :CounterFactual]
);
dfActCounterFactualStack[:, :category] = repeat(["All"; listVarsShort], 3);
transform!(
dfActCounterFactualStack,
:variable =>
(
x -> categorical(
x;
ordered = true,
levels = ["Actual", "Predict", "CounterFactual"]
)
) =>
:variable
);
groupedbar(
dfActCounterFactualStack.category,
dfActCounterFactualStack.value,
group = dfActCounterFactualStack.variable,
xlabel = "Number of hospitals in a city",
ylabel = "Number of cities",
bar_width = 0.67,
lw = 0,
xrotation = 30
)