using CSV
using DataFrames
using LinearAlgebra
using GLM
using ForwardDiff
using Optim
using BenchmarkTools
using Plots
using Serialization8 応用編
for file in readdir("functions/single_agent_dynamic/")
include("functions/single_agent_dynamic/" * file)
end8.1 前章で生成したデータの読み込み
data_gen = CSV.read("tmp/single_agent_dynamic_basic/data_gen.csv", DataFrame);
first(data_gen, 3)3×9 DataFrame
| Row | state | action | period | consumer_id | price | mileage | price_lag | mileage_lag | action_lag |
|---|---|---|---|---|---|---|---|---|---|
| Int64 | Int64 | Int64 | Int64 | Int64 | Int64 | Int64? | Int64? | Int64? | |
| 1 | 6 | 0 | 481 | 1 | 2500 | 0 | missing | missing | missing |
| 2 | 12 | 0 | 482 | 1 | 2500 | 5 | 2500 | 0 | 0 |
| 3 | 18 | 0 | 483 | 1 | 2500 | 10 | 2500 | 5 | 0 |
8.2 前章で推定したパラメタの読み込み
kappa_est = deserialize("tmp/single_agent_dynamic_basic/kappa_est.ser");
lambda_est = deserialize("tmp/single_agent_dynamic_basic/lambda_est.ser");
theta_est = deserialize("tmp/single_agent_dynamic_basic/theta_est.ser");theta_true = [0.004, 0.003];
beta = 0.99;
Euler_const = Float64(MathConstants.eulergamma);
num_choice = 2;price_states = 2000:100:2500;
mileage_states = 0:5:100;
num_price_states = length(price_states);
num_mileage_states = length(mileage_states);
num_states = num_price_states * num_mileage_states;
states_matrix = Matrix(
reshape(
reinterpret(
Int,
collect(Iterators.product(price_states, mileage_states))
),
(2, :))'
);describe(
data_gen[:, [:price, :mileage, :action]],
:all
)[:, [:variable, :mean, :std, :min, :max]]3×5 DataFrame
| Row | variable | mean | std | min | max |
|---|---|---|---|---|---|
| Symbol | Float64 | Float64 | Int64 | Int64 | |
| 1 | price | 2322.63 | 151.826 | 2000 | 2500 |
| 2 | mileage | 33.0198 | 23.6924 | 0 | 100 |
| 3 | action | 0.02915 | 0.168228 | 0 | 1 |
8.3 二段階推定法
8.3.1 推定されたパラメタに基づくState transition matrixの生成
mileage_trans_mat_hat = generateMileageTransition(
kappa_est,
num_mileage_states
);
price_trans_mat_hat = generatePriceTransition(
lambda_est,
num_price_states
);
trans_mat_hat = Array{Float64}(undef, num_states, num_states, num_choice);
for i in 1:num_choice
trans_mat_hat[:, :, i] = kron(mileage_trans_mat_hat[:, :, i], price_trans_mat_hat);
end8.3.2 CCPの推定
logit_model = glm(
@formula(action ~ price + price^2 + mileage + mileage^2), data_gen, Binomial(), LogitLink()
);CCP_1st = hcat(
1 .- predict(logit_model, DataFrame(price = states_matrix[:, 1], mileage = states_matrix[:, 2])),
predict(logit_model, DataFrame(price = states_matrix[:, 1], mileage = states_matrix[:, 2]))
);
CCP_1st = convert(Matrix{Float64}, CCP_1st);8.3.3 行列形式によるインバージョンを用いたパラメタの推定
@time mat_inv_opt_mat_inv = optimize(
x -> - calculateLikelihoodFromCCP(
x,
CCP_1st,
data_gen,
beta,
trans_mat_hat,
states_matrix,
calculateCCPByMatrixInversion
),
theta_true,
Optim.Options(show_trace = false)
); 2.836205 seconds (72.91 M allocations: 1.533 GiB, 6.88% gc time, 38.60% compilation time)
theta_mat_inv = mat_inv_opt_mat_inv.minimizer;
hessian_mat_inv = ForwardDiff.hessian(
x -> - calculateLikelihoodFromCCP(
x,
CCP_1st,
data_gen,
beta,
trans_mat_hat,
states_matrix,
calculateCCPByMatrixInversion
),
theta_mat_inv
);
theta_se_mat_inv = sqrt.(diag(inv(hessian_mat_inv)));DataFrame(theta_mat_inv = theta_mat_inv, theta_se_mat_inv = theta_se_mat_inv)2×2 DataFrame
| Row | theta_mat_inv | theta_se_mat_inv |
|---|---|---|
| Float64 | Float64 | |
| 1 | 0.00400905 | 8.65792e-5 |
| 2 | 0.00300245 | 3.63389e-5 |
8.3.4 有限依存性を用いたパラメタの推定
@time finite_dep_opt = optimize(
x -> - calculateLikelihoodFromCCP(
x,
CCP_1st,
data_gen,
beta,
trans_mat_hat,
states_matrix,
calculateCCPByFiniteDependency
),
theta_true,
Optim.Options(show_trace = false)
); 1.788700 seconds (66.94 M allocations: 1.205 GiB, 7.84% gc time, 8.41% compilation time)
theta_finite_dep = finite_dep_opt.minimizer;
hessian_finite_dep = ForwardDiff.hessian(
x -> - calculateLikelihoodFromCCP(
x,
CCP_1st,
data_gen,
beta,
trans_mat_hat,
states_matrix,
calculateCCPByFiniteDependency
),
theta_finite_dep
);
theta_se_finite_dep = sqrt.(diag(inv(hessian_finite_dep)));DataFrame(theta_finite_dep = theta_finite_dep, theta_se_finite_dep = theta_se_finite_dep)2×2 DataFrame
| Row | theta_finite_dep | theta_se_finite_dep |
|---|---|---|
| Float64 | Float64 | |
| 1 | 0.00416891 | 0.000802626 |
| 2 | 0.00293179 | 2.2015e-5 |
8.3.5 推定方法の比較
@time nfxp_opt = optimize(
x -> - calculateLikelihoodNFXP(x, data_gen, beta, trans_mat_hat, states_matrix),
theta_true,
Optim.Options(show_trace = false)
) 4.879810 seconds (71.33 M allocations: 4.697 GiB, 10.64% gc time, 2.92% compilation time)
* Status: success
* Candidate solution
Final objective value: 1.333722e+04
* Found with
Algorithm: Nelder-Mead
* Convergence measures
√(Σ(yᵢ-ȳ)²)/n ≤ 1.0e-08
* Work counters
Seconds run: 5 (vs limit Inf)
Iterations: 48
f(x) calls: 94
theta_nfxp = nfxp_opt.minimizer;
hessian_nfxp = ForwardDiff.hessian(
x -> - calculateLikelihoodNFXP(x, data_gen, beta, trans_mat_hat, states_matrix),
theta_nfxp
);
theta_se_nfxp = sqrt.(diag(inv(hessian_nfxp)));DataFrame(theta_nfxp = theta_nfxp, theta_se_nfxp = theta_se_nfxp)2×2 DataFrame
| Row | theta_nfxp | theta_se_nfxp |
|---|---|---|
| Float64 | Float64 | |
| 1 | 0.00401826 | 8.88336e-5 |
| 2 | 0.00300455 | 3.67953e-5 |
DataFrame(
Algorithm = ["Matrix Inversion", "Finite Dependency", "NFXP"],
theta_c = [theta_mat_inv[1], theta_finite_dep[1], theta_nfxp[1]],
theta_se_c = [theta_se_mat_inv[1], theta_se_finite_dep[1], theta_se_nfxp[1]],
theta_p = [theta_mat_inv[2], theta_finite_dep[2], theta_nfxp[2]],
theta_se_p = [theta_se_mat_inv[2], theta_se_finite_dep[2], theta_se_nfxp[2]],
)3×5 DataFrame
| Row | Algorithm | theta_c | theta_se_c | theta_p | theta_se_p |
|---|---|---|---|---|---|
| String | Float64 | Float64 | Float64 | Float64 | |
| 1 | Matrix Inversion | 0.00400905 | 8.65792e-5 | 0.00300245 | 3.63389e-5 |
| 2 | Finite Dependency | 0.00416891 | 0.000802626 | 0.00293179 | 2.2015e-5 |
| 3 | NFXP | 0.00401826 | 8.88336e-5 | 0.00300455 | 3.67953e-5 |
8.4 反実仮想分析 1: Every Day Low Price
CCP_dict = Dict();prob_buy_baseline = calculateCCPByNFXP(theta_nfxp, beta, trans_mat_hat, states_matrix);
CCP_dict["Baseline"] = prob_buy_baseline;result_df_edlp = combine(
groupby(
data_gen,
[:state, :price]
),
nrow => :num_obs
);
sort!(result_df_edlp, :state)
result_df_edlp[!, :prob_buy_baseline] = prob_buy_baseline[:, 2];
result_df_edlp = combine(
groupby(
result_df_edlp,
[:price]
),
[:prob_buy_baseline, :num_obs] =>
(
(prob_buy_baseline, num_obs) ->
sum(prob_buy_baseline .* num_obs) / sum(num_obs)
) =>
:prob_buy
);G_fixed_price = generateMileageTransition(
kappa_est,
num_mileage_states
);for fixed_price in price_states
states_matrix_fixed_price = (
states_matrix[states_matrix[:, 1] .== fixed_price, :]
)
CCP_dict["edlp_" * string(fixed_price)] = calculateCCPByNFXP(
theta_nfxp,
beta,
G_fixed_price,
states_matrix_fixed_price;
num_states = num_mileage_states
)
endresult_df_edlp2000 = combine(
groupby(
data_gen,
[:mileage]
),
nrow => :num_obs
);
sort!(result_df_edlp2000, :mileage)
result_df_edlp2000[!, :prob_buy_baseline] = CCP_dict["edlp_2000"][:, 2];
prob_buy_edlp2000 = (
sum(result_df_edlp2000.prob_buy_baseline .* result_df_edlp2000.num_obs) /
sum(result_df_edlp2000.num_obs)
);bar(
price_states,
result_df_edlp.prob_buy,
bar_width = 50, legend=false
)
hline!([prob_buy_edlp2000])
xlabel!("Price (thousand JPY)")
ylabel!("Purchase probability")8.5 反実仮想分析 2: 永続的・一時的な値下げ
states_matrix_discount = states_matrix[:, :];
states_matrix_discount[:, 1] .-= 100;
CCP_dict["Permanent"] = calculateCCPByNFXP(theta_nfxp, beta, trans_mat_hat, states_matrix_discount);U_discount = calculateFlowUtil(theta_nfxp, states_matrix_discount);
V = calculateVByContraction(theta_nfxp, beta, trans_mat_hat, states_matrix);
CV_temporary = U_discount + beta .* hcat(trans_mat_hat[:, :, 1] * V, trans_mat_hat[:, :, 2] * V);
CCP_dict["Temporary"] = exp.(CV_temporary) ./ sum(exp.(CV_temporary), dims = 2);plot_cf_df = DataFrame(price = states_matrix[:, 1], mileage = states_matrix[:, 2]);
plot_cf_df[!, :ProbBaseline] = CCP_dict["Baseline"][:, 2];
plot_cf_df[!, :ProbPermanent] = CCP_dict["Permanent"][:, 2];
plot_cf_df[!, :ProbTemporary] = CCP_dict["Temporary"][:, 2];
filter!(:price => (x -> x == 2200), plot_cf_df);
plot(plot_cf_df.mileage, plot_cf_df.ProbBaseline, label = "Baseline")
plot!(plot_cf_df.mileage, plot_cf_df.ProbPermanent, label = "Permanent")
plot!(plot_cf_df.mileage, plot_cf_df.ProbTemporary, label = "Temporary")
xlabel!("Mileage (thousand km)")
ylabel!("Purchase probability")consumer_dist_obs = combine(
groupby(
data_gen,
[:state]
),
nrow
)
transform!(
consumer_dist_obs,
:nrow => (x -> x / sum(x)) => :consumer_dist_obs
);discount_scenario_vec = ["Baseline", "Permanent", "Temporary"];
G_CCP_dict = Dict();
for scenario in discount_scenario_vec
G_CCP_dict[scenario] = (
CCP_dict[scenario][:, 1] .* trans_mat_hat[:, :, 1] +
CCP_dict[scenario][:, 2] .* trans_mat_hat[:, :, 2]
);
end
for scenario in ["edlp_" * string(price) for price in price_states]
G_CCP_dict[scenario] = (
CCP_dict[scenario][:, 1] .* G_fixed_price[:, :, 1] +
CCP_dict[scenario][:, 2] .* G_fixed_price[:, :, 2]
);
endnum_consumer_sim = 1000;
num_period_sim = 20;discount_sim_dict = Dict();
for scenario in discount_scenario_vec
consumer_dist_sim = zeros((num_period_sim, num_states));
consumer_dist_sim[1, :] .= consumer_dist_obs.consumer_dist_obs;
prob_buy_sim_vec = zeros(num_period_sim);
demand_vec = zeros(num_period_sim);
revenue_vec = zeros(num_period_sim);
for t in 1:num_period_sim
if ((t == 1) & (scenario == "Temporary"))
if (t != num_period_sim)
consumer_dist_sim[t + 1, :] = consumer_dist_sim[t, :]' * G_CCP_dict[scenario];
end
prob_buy_sim_vec[t] = consumer_dist_sim[t, :]' * CCP_dict[scenario][:, 2];
demand_vec[t] = prob_buy_sim_vec[t]' * num_consumer_sim;
revenue_vec[t] = sum(
(states_matrix[:, 1] .- 100) .* consumer_dist_sim[t, :] .* CCP_dict[scenario][:, 2] .* num_consumer_sim
)
else
if (scenario == "Temporary")
scenario_current = "Baseline";
discount = 0;
elseif (scenario == "Permanent")
scenario_current = scenario;
discount = 100;
else
scenario_current = scenario;
discount = 0;
end
if (t != num_period_sim)
consumer_dist_sim[t + 1, :] = consumer_dist_sim[t, :]' * G_CCP_dict[scenario_current];
end
prob_buy_sim_vec[t] = consumer_dist_sim[t, :]' * CCP_dict[scenario_current][:, 2];
demand_vec[t] = prob_buy_sim_vec[t]' * num_consumer_sim;
revenue_vec[t] = sum(
(states_matrix[:, 1] .- discount) .* consumer_dist_sim[t, :] .*
CCP_dict[scenario_current][:, 2] .* num_consumer_sim
)
end
end
discount_sim_dict[scenario] = DataFrame(
prob_buy_sim = prob_buy_sim_vec,
demand = demand_vec,
revenue = revenue_vec,
)
endplot(
1:num_period_sim,
(
discount_sim_dict["Permanent"].prob_buy_sim -
discount_sim_dict["Baseline"].prob_buy_sim
),
label = "Permanent Change"
)
plot!(
1:num_period_sim,
(
discount_sim_dict["Temporary"].prob_buy_sim -
discount_sim_dict["Baseline"].prob_buy_sim
),
label = "Temporary Change"
)
xlabel!("Period")
ylabel!("Change in purchase probability")8.6 需要と収入の計算
consumer_dist_obs_edlp = combine(
groupby(
data_gen,
[:mileage]
),
nrow
)
transform!(
consumer_dist_obs_edlp,
:nrow => (x -> x / sum(x)) => :consumer_dist_obs
);edlp_sim_dict = Dict();
for fixed_price in price_states
scenario_edlp = "edlp_" * string(fixed_price);
consumer_dist_sim = zeros((num_period_sim, num_mileage_states));
consumer_dist_sim[1, :] .= consumer_dist_obs_edlp.consumer_dist_obs;
prob_buy_sim_vec = zeros(num_period_sim);
demand_vec = zeros(num_period_sim);
revenue_vec = zeros(num_period_sim);
for t in 1:num_period_sim
if (t != num_period_sim)
consumer_dist_sim[t + 1, :] = consumer_dist_sim[t, :]' * G_CCP_dict[scenario_edlp];
end
prob_buy_sim_vec[t] = consumer_dist_sim[t, :]' * CCP_dict[scenario_edlp][:, 2];
demand_vec[t] = prob_buy_sim_vec[t]' * num_consumer_sim;
revenue_vec[t] = sum(
fixed_price .* consumer_dist_sim[t, :] .*
CCP_dict[scenario_edlp][:, 2] .* num_consumer_sim
)
end
edlp_sim_dict[fixed_price] = DataFrame(
prob_buy_sim = prob_buy_sim_vec,
demand = demand_vec,
revenue = revenue_vec,
)
endplot()
for fixed_price in price_states
plot!(1:num_period_sim, edlp_sim_dict[fixed_price].demand, label = "Price: " * string(fixed_price))
end
current()
xlabel!("Period")
ylabel!("Demand")plot()
for fixed_price in price_states
plot!(1:num_period_sim, edlp_sim_dict[fixed_price].revenue, label = "Price: " * string(fixed_price))
end
current()
xlabel!("Period")
ylabel!("Revenue")plot()
for scenario in ["Baseline", "Temporary", "Permanent"]
plot!(
1:num_period_sim,
discount_sim_dict[scenario].demand,
label = scenario
)
end
current()
xlabel!("Period")
ylabel!("Demand")plot()
for scenario in ["Baseline", "Temporary", "Permanent"]
plot!(
1:num_period_sim,
discount_sim_dict[scenario].demand |> cumsum,
label = scenario
)
end
current()
xlabel!("Period")
ylabel!("Demand (cumulative sum)")plot()
for scenario in ["Temporary", "Permanent"]
plot!(
1:num_period_sim,
(discount_sim_dict[scenario].demand |> cumsum) ./ (discount_sim_dict["Baseline"].demand |> cumsum) .* 100,
label = scenario
)
end
current()
xlabel!("Period")
ylabel!("Demand (cumulative sum, relative to Baseline)")plot()
for scenario in ["Baseline", "Temporary", "Permanent"]
plot!(
1:num_period_sim,
discount_sim_dict[scenario].revenue,
label = scenario
)
end
current()
xlabel!("Period")
ylabel!("Revenue")output_df = DataFrame(
scenario = [
["Baseline", "Temporary", "Permanent"];
["edlp_" * string(price) for price in price_states]
],
demand = [
[
discount_sim_dict[scenario].demand |> sum
for scenario in ["Baseline", "Temporary", "Permanent"]
]; [
edlp_sim_dict[price].demand |> sum
for price in price_states
]
],
revenue = [
[
discount_sim_dict[scenario].revenue .* (beta .^((1:num_period_sim) .- 1)) |> sum
for scenario in ["Baseline", "Temporary", "Permanent"]
]; [
edlp_sim_dict[price].revenue .* (beta .^((1:num_period_sim) .- 1)) |> sum
for price in price_states
]
]
)
output_df[!, :per_rev] = output_df[:, :revenue] ./ output_df[:, :demand] ./ 10;
output_df9×4 DataFrame
| Row | scenario | demand | revenue | per_rev |
|---|---|---|---|---|
| String | Float64 | Float64 | Float64 | |
| 1 | Baseline | 583.325 | 1.19795e6 | 205.365 |
| 2 | Temporary | 586.178 | 1.20157e6 | 204.984 |
| 3 | Permanent | 623.768 | 1.22583e6 | 196.52 |
| 4 | edlp_2000 | 711.808 | 1.30044e6 | 182.695 |
| 5 | edlp_2100 | 664.11 | 1.27258e6 | 191.622 |
| 6 | edlp_2200 | 620.518 | 1.24426e6 | 200.519 |
| 7 | edlp_2300 | 580.319 | 1.21512e6 | 209.388 |
| 8 | edlp_2400 | 542.954 | 1.18489e6 | 218.23 |
| 9 | edlp_2500 | 507.981 | 1.15335e6 | 227.046 |