8  応用編

using CSV
using DataFrames
using LinearAlgebra
using GLM
using ForwardDiff
using Optim
using BenchmarkTools
using Plots
using Serialization
for file in readdir("functions/single_agent_dynamic/")
    include("functions/single_agent_dynamic/" * file)
end

8.1 前章で生成したデータの読み込み

data_gen = CSV.read("tmp/single_agent_dynamic_basic/data_gen.csv", DataFrame);
first(data_gen, 3)
3×9 DataFrame
Rowstateactionperiodconsumer_idpricemileageprice_lagmileage_lagaction_lag
Int64Int64Int64Int64Int64Int64Int64?Int64?Int64?
160481125000missingmissingmissing
2120482125005250000
31804831250010250050

8.2 前章で推定したパラメタの読み込み

kappa_est = deserialize("tmp/single_agent_dynamic_basic/kappa_est.ser");
lambda_est = deserialize("tmp/single_agent_dynamic_basic/lambda_est.ser");
theta_est = deserialize("tmp/single_agent_dynamic_basic/theta_est.ser");
theta_true = [0.004, 0.003];
beta = 0.99;
Euler_const = Float64(MathConstants.eulergamma);
num_choice = 2;
price_states = 2000:100:2500;
mileage_states = 0:5:100;
num_price_states = length(price_states);
num_mileage_states = length(mileage_states);
num_states = num_price_states * num_mileage_states;

states_matrix = Matrix(
  reshape(
    reinterpret(
      Int, 
      collect(Iterators.product(price_states, mileage_states))
      ), 
    (2, :))'
    );
describe(
  data_gen[:, [:price, :mileage, :action]], 
  :all
  )[:, [:variable, :mean, :std, :min, :max]]
3×5 DataFrame
Rowvariablemeanstdminmax
SymbolFloat64Float64Int64Int64
1price2322.63151.82620002500
2mileage33.019823.69240100
3action0.029150.16822801

8.3 二段階推定法

8.3.1 推定されたパラメタに基づくState transition matrixの生成

mileage_trans_mat_hat = generateMileageTransition(
  kappa_est, 
  num_mileage_states
  );

price_trans_mat_hat = generatePriceTransition(
  lambda_est, 
  num_price_states
);

trans_mat_hat = Array{Float64}(undef, num_states, num_states, num_choice);
for i in 1:num_choice
    trans_mat_hat[:, :, i] = kron(mileage_trans_mat_hat[:, :, i], price_trans_mat_hat);
end

8.3.2 CCPの推定

logit_model = glm(
  @formula(action ~ price + price^2 + mileage + mileage^2), data_gen, Binomial(), LogitLink()
  );
CCP_1st = hcat(
  1 .- predict(logit_model, DataFrame(price = states_matrix[:, 1], mileage = states_matrix[:, 2])), 
  predict(logit_model, DataFrame(price = states_matrix[:, 1], mileage = states_matrix[:, 2]))
  );
CCP_1st = convert(Matrix{Float64}, CCP_1st);

8.3.3 行列形式によるインバージョンを用いたパラメタの推定

@time mat_inv_opt_mat_inv = optimize(
    x -> - calculateLikelihoodFromCCP(
      x, 
      CCP_1st, 
      data_gen, 
      beta, 
      trans_mat_hat, 
      states_matrix, 
      calculateCCPByMatrixInversion
      ),
    theta_true,
    Optim.Options(show_trace = false)
);
  2.836205 seconds (72.91 M allocations: 1.533 GiB, 6.88% gc time, 38.60% compilation time)
theta_mat_inv = mat_inv_opt_mat_inv.minimizer;

hessian_mat_inv = ForwardDiff.hessian(
    x -> - calculateLikelihoodFromCCP(
      x, 
      CCP_1st, 
      data_gen, 
      beta, 
      trans_mat_hat, 
      states_matrix, 
      calculateCCPByMatrixInversion
      ),
    theta_mat_inv
    );

theta_se_mat_inv = sqrt.(diag(inv(hessian_mat_inv)));
DataFrame(theta_mat_inv = theta_mat_inv, theta_se_mat_inv = theta_se_mat_inv)
2×2 DataFrame
Rowtheta_mat_invtheta_se_mat_inv
Float64Float64
10.004009058.65792e-5
20.003002453.63389e-5

8.3.4 有限依存性を用いたパラメタの推定

@time finite_dep_opt = optimize(
    x -> - calculateLikelihoodFromCCP(
      x, 
      CCP_1st, 
      data_gen, 
      beta, 
      trans_mat_hat, 
      states_matrix, 
      calculateCCPByFiniteDependency
      ),
    theta_true,
    Optim.Options(show_trace = false)
);
  1.788700 seconds (66.94 M allocations: 1.205 GiB, 7.84% gc time, 8.41% compilation time)
theta_finite_dep = finite_dep_opt.minimizer;

hessian_finite_dep = ForwardDiff.hessian(
    x -> - calculateLikelihoodFromCCP(
      x,
      CCP_1st, 
      data_gen, 
      beta, 
      trans_mat_hat, 
      states_matrix, 
      calculateCCPByFiniteDependency
      ),
    theta_finite_dep
    );

theta_se_finite_dep = sqrt.(diag(inv(hessian_finite_dep)));
DataFrame(theta_finite_dep = theta_finite_dep, theta_se_finite_dep = theta_se_finite_dep)
2×2 DataFrame
Rowtheta_finite_deptheta_se_finite_dep
Float64Float64
10.004168910.000802626
20.002931792.2015e-5

8.3.5 推定方法の比較

@time nfxp_opt = optimize(
    x -> - calculateLikelihoodNFXP(x, data_gen, beta, trans_mat_hat, states_matrix),
    theta_true,
    Optim.Options(show_trace = false)
)
  4.879810 seconds (71.33 M allocations: 4.697 GiB, 10.64% gc time, 2.92% compilation time)
 * Status: success

 * Candidate solution
    Final objective value:     1.333722e+04

 * Found with
    Algorithm:     Nelder-Mead

 * Convergence measures
    √(Σ(yᵢ-ȳ)²)/n ≤ 1.0e-08

 * Work counters
    Seconds run:   5  (vs limit Inf)
    Iterations:    48
    f(x) calls:    94
theta_nfxp = nfxp_opt.minimizer;

hessian_nfxp = ForwardDiff.hessian(
    x -> - calculateLikelihoodNFXP(x, data_gen, beta, trans_mat_hat, states_matrix),
    theta_nfxp
    );

theta_se_nfxp = sqrt.(diag(inv(hessian_nfxp)));
DataFrame(theta_nfxp = theta_nfxp, theta_se_nfxp = theta_se_nfxp)
2×2 DataFrame
Rowtheta_nfxptheta_se_nfxp
Float64Float64
10.004018268.88336e-5
20.003004553.67953e-5
DataFrame(
  Algorithm = ["Matrix Inversion", "Finite Dependency", "NFXP"],
  theta_c = [theta_mat_inv[1], theta_finite_dep[1], theta_nfxp[1]],
  theta_se_c = [theta_se_mat_inv[1], theta_se_finite_dep[1], theta_se_nfxp[1]],
  theta_p = [theta_mat_inv[2], theta_finite_dep[2], theta_nfxp[2]],
  theta_se_p = [theta_se_mat_inv[2], theta_se_finite_dep[2], theta_se_nfxp[2]],
)
3×5 DataFrame
RowAlgorithmtheta_ctheta_se_ctheta_ptheta_se_p
StringFloat64Float64Float64Float64
1Matrix Inversion0.004009058.65792e-50.003002453.63389e-5
2Finite Dependency0.004168910.0008026260.002931792.2015e-5
3NFXP0.004018268.88336e-50.003004553.67953e-5

8.4 反実仮想分析 1: Every Day Low Price

CCP_dict = Dict();
prob_buy_baseline = calculateCCPByNFXP(theta_nfxp, beta, trans_mat_hat, states_matrix);
CCP_dict["Baseline"] = prob_buy_baseline;
result_df_edlp = combine(
  groupby(
    data_gen,
    [:state, :price]
  ),
  nrow => :num_obs
);
sort!(result_df_edlp, :state)
result_df_edlp[!, :prob_buy_baseline] = prob_buy_baseline[:, 2];

result_df_edlp = combine(
  groupby(
    result_df_edlp,
    [:price]
  ),
  [:prob_buy_baseline, :num_obs] =>
  (
    (prob_buy_baseline, num_obs) ->
    sum(prob_buy_baseline .* num_obs) / sum(num_obs)
  ) =>
  :prob_buy
);
G_fixed_price = generateMileageTransition(
  kappa_est, 
  num_mileage_states
);
for fixed_price in price_states
  states_matrix_fixed_price = (
    states_matrix[states_matrix[:, 1] .== fixed_price, :]
  )

  CCP_dict["edlp_" * string(fixed_price)] = calculateCCPByNFXP(
    theta_nfxp,
    beta,
    G_fixed_price,
    states_matrix_fixed_price;
    num_states = num_mileage_states
  )
end
result_df_edlp2000 = combine(
  groupby(
    data_gen,
    [:mileage]
  ),
  nrow => :num_obs
);
sort!(result_df_edlp2000, :mileage)
result_df_edlp2000[!, :prob_buy_baseline] = CCP_dict["edlp_2000"][:, 2];

prob_buy_edlp2000 = (
  sum(result_df_edlp2000.prob_buy_baseline .* result_df_edlp2000.num_obs) / 
  sum(result_df_edlp2000.num_obs)
);
bar(
    price_states,
    result_df_edlp.prob_buy,
    bar_width = 50, legend=false
)
hline!([prob_buy_edlp2000])

xlabel!("Price (thousand JPY)")
ylabel!("Purchase probability")

8.5 反実仮想分析 2: 永続的・一時的な値下げ

states_matrix_discount = states_matrix[:, :];
states_matrix_discount[:, 1] .-= 100;

CCP_dict["Permanent"] = calculateCCPByNFXP(theta_nfxp, beta, trans_mat_hat, states_matrix_discount);
U_discount = calculateFlowUtil(theta_nfxp, states_matrix_discount);

V = calculateVByContraction(theta_nfxp, beta, trans_mat_hat, states_matrix);

CV_temporary = U_discount + beta .* hcat(trans_mat_hat[:, :, 1] * V, trans_mat_hat[:, :, 2] * V);

CCP_dict["Temporary"] = exp.(CV_temporary) ./ sum(exp.(CV_temporary), dims = 2);
plot_cf_df = DataFrame(price = states_matrix[:, 1], mileage = states_matrix[:, 2]);
plot_cf_df[!, :ProbBaseline] = CCP_dict["Baseline"][:, 2];
plot_cf_df[!, :ProbPermanent] = CCP_dict["Permanent"][:, 2];
plot_cf_df[!, :ProbTemporary] = CCP_dict["Temporary"][:, 2];

filter!(:price => (x -> x == 2200), plot_cf_df);

plot(plot_cf_df.mileage, plot_cf_df.ProbBaseline, label = "Baseline")
plot!(plot_cf_df.mileage, plot_cf_df.ProbPermanent, label = "Permanent")
plot!(plot_cf_df.mileage, plot_cf_df.ProbTemporary, label = "Temporary")
xlabel!("Mileage (thousand km)")
ylabel!("Purchase probability")
consumer_dist_obs = combine(
  groupby(
    data_gen,
    [:state]
  ),
  nrow
)
transform!(
    consumer_dist_obs,
    :nrow => (x -> x / sum(x)) => :consumer_dist_obs
);
discount_scenario_vec = ["Baseline", "Permanent", "Temporary"];

G_CCP_dict = Dict();

for scenario in discount_scenario_vec
  G_CCP_dict[scenario] = (
    CCP_dict[scenario][:, 1] .* trans_mat_hat[:, :, 1] +
    CCP_dict[scenario][:, 2] .* trans_mat_hat[:, :, 2]
  );
end

for scenario in ["edlp_" * string(price) for price in price_states]
  G_CCP_dict[scenario] = (
    CCP_dict[scenario][:, 1] .* G_fixed_price[:, :, 1] +
    CCP_dict[scenario][:, 2] .* G_fixed_price[:, :, 2]
  );
end
num_consumer_sim = 1000;
num_period_sim = 20;
discount_sim_dict = Dict();

for scenario in discount_scenario_vec

  consumer_dist_sim = zeros((num_period_sim, num_states));
  consumer_dist_sim[1, :] .= consumer_dist_obs.consumer_dist_obs;

  prob_buy_sim_vec = zeros(num_period_sim);
  demand_vec = zeros(num_period_sim);
  revenue_vec = zeros(num_period_sim);

  for t in 1:num_period_sim

    if ((t == 1) & (scenario == "Temporary"))

      if (t != num_period_sim)
        consumer_dist_sim[t + 1, :] = consumer_dist_sim[t, :]' * G_CCP_dict[scenario];
      end

      prob_buy_sim_vec[t] = consumer_dist_sim[t, :]' * CCP_dict[scenario][:, 2];
      demand_vec[t] = prob_buy_sim_vec[t]' * num_consumer_sim;
      revenue_vec[t] = sum(
        (states_matrix[:, 1] .- 100) .* consumer_dist_sim[t, :] .* CCP_dict[scenario][:, 2] .* num_consumer_sim
      )

    else

      if (scenario == "Temporary")
        scenario_current = "Baseline";
        discount = 0;
      elseif (scenario == "Permanent")
        scenario_current = scenario;
        discount = 100;
      else
        scenario_current = scenario;
        discount = 0;
      end

      if (t != num_period_sim)
        consumer_dist_sim[t + 1, :] = consumer_dist_sim[t, :]' * G_CCP_dict[scenario_current];
      end

      prob_buy_sim_vec[t] = consumer_dist_sim[t, :]' * CCP_dict[scenario_current][:, 2];
      demand_vec[t] = prob_buy_sim_vec[t]' * num_consumer_sim;
      revenue_vec[t] = sum(
        (states_matrix[:, 1] .- discount) .* consumer_dist_sim[t, :] .* 
        CCP_dict[scenario_current][:, 2] .* num_consumer_sim
      )

    end

  end

  discount_sim_dict[scenario] = DataFrame(
    prob_buy_sim = prob_buy_sim_vec,
    demand = demand_vec,
    revenue = revenue_vec,
  )

end
plot(
  1:num_period_sim,
  (
    discount_sim_dict["Permanent"].prob_buy_sim - 
    discount_sim_dict["Baseline"].prob_buy_sim
  ),
  label = "Permanent Change"
)
plot!(
  1:num_period_sim,
  (
    discount_sim_dict["Temporary"].prob_buy_sim - 
    discount_sim_dict["Baseline"].prob_buy_sim
  ),
  label = "Temporary Change"
)

xlabel!("Period")
ylabel!("Change in purchase probability")

8.6 需要と収入の計算

consumer_dist_obs_edlp = combine(
  groupby(
    data_gen,
    [:mileage]
  ),
  nrow
)
transform!(
    consumer_dist_obs_edlp,
    :nrow => (x -> x / sum(x)) => :consumer_dist_obs
);
edlp_sim_dict = Dict();

for fixed_price in price_states

  scenario_edlp = "edlp_" * string(fixed_price);

  consumer_dist_sim = zeros((num_period_sim, num_mileage_states));
  consumer_dist_sim[1, :] .= consumer_dist_obs_edlp.consumer_dist_obs;

  prob_buy_sim_vec = zeros(num_period_sim);
  demand_vec = zeros(num_period_sim);
  revenue_vec = zeros(num_period_sim);

  for t in 1:num_period_sim

    if (t != num_period_sim)
      consumer_dist_sim[t + 1, :] = consumer_dist_sim[t, :]' * G_CCP_dict[scenario_edlp];
    end

    prob_buy_sim_vec[t] = consumer_dist_sim[t, :]' * CCP_dict[scenario_edlp][:, 2];
    demand_vec[t] = prob_buy_sim_vec[t]' * num_consumer_sim;
    revenue_vec[t] = sum(
      fixed_price .* consumer_dist_sim[t, :] .* 
      CCP_dict[scenario_edlp][:, 2] .* num_consumer_sim
    )

  end

  edlp_sim_dict[fixed_price] = DataFrame(
    prob_buy_sim = prob_buy_sim_vec,
    demand = demand_vec,
    revenue = revenue_vec,
  )

end
plot()
for fixed_price in price_states
  plot!(1:num_period_sim, edlp_sim_dict[fixed_price].demand, label = "Price: " * string(fixed_price))
end
current()
xlabel!("Period")
ylabel!("Demand")
plot()
for fixed_price in price_states
  plot!(1:num_period_sim, edlp_sim_dict[fixed_price].revenue, label = "Price: " * string(fixed_price))
end
current()
xlabel!("Period")
ylabel!("Revenue")
plot()
for scenario in ["Baseline", "Temporary", "Permanent"]
  plot!(
    1:num_period_sim,
    discount_sim_dict[scenario].demand,
    label = scenario
  )
end
current()
xlabel!("Period")
ylabel!("Demand")
plot()
for scenario in ["Baseline", "Temporary", "Permanent"]
  plot!(
    1:num_period_sim,
    discount_sim_dict[scenario].demand |> cumsum,
    label = scenario
  )
end
current()
xlabel!("Period")
ylabel!("Demand (cumulative sum)")
plot()
for scenario in ["Temporary", "Permanent"]
  plot!(
    1:num_period_sim,
    (discount_sim_dict[scenario].demand |> cumsum) ./ (discount_sim_dict["Baseline"].demand |> cumsum) .* 100,
    label = scenario
  )
end
current()
xlabel!("Period")
ylabel!("Demand (cumulative sum, relative to Baseline)")
plot()
for scenario in ["Baseline", "Temporary", "Permanent"]
  plot!(
    1:num_period_sim,
    discount_sim_dict[scenario].revenue,
    label = scenario
  )
end
current()
xlabel!("Period")
ylabel!("Revenue")
output_df = DataFrame(
  scenario = [
    ["Baseline", "Temporary", "Permanent"];
    ["edlp_" * string(price) for price in price_states]
  ],
  demand = [
    [
      discount_sim_dict[scenario].demand |> sum
      for scenario in ["Baseline", "Temporary", "Permanent"]
    ]; [
      edlp_sim_dict[price].demand |> sum
      for price in price_states
    ]
  ],
  revenue = [
    [
      discount_sim_dict[scenario].revenue .* (beta .^((1:num_period_sim) .- 1)) |> sum
      for scenario in ["Baseline", "Temporary", "Permanent"]
    ]; [
      edlp_sim_dict[price].revenue .* (beta .^((1:num_period_sim) .- 1)) |> sum
      for price in price_states
    ]
  ]
)

output_df[!, :per_rev] = output_df[:, :revenue] ./ output_df[:, :demand] ./ 10;

output_df
9×4 DataFrame
Rowscenariodemandrevenueper_rev
StringFloat64Float64Float64
1Baseline583.3251.19795e6205.365
2Temporary586.1781.20157e6204.984
3Permanent623.7681.22583e6196.52
4edlp_2000711.8081.30044e6182.695
5edlp_2100664.111.27258e6191.622
6edlp_2200620.5181.24426e6200.519
7edlp_2300580.3191.21512e6209.388
8edlp_2400542.9541.18489e6218.23
9edlp_2500507.9811.15335e6227.046