using CSV
using LinearAlgebra
using Random
using StatsBase
using Distributions
using DataFrames
using Plots
using ShiftedArrays
using ForwardDiff
using Optim
using Serialization7 基礎編
for file in readdir("functions/single_agent_dynamic/")
include("functions/single_agent_dynamic/" * file)
end7.1 パラメタの設定
theta_true = [0.004, 0.003];
beta = 0.99;
Euler_const = Float64(MathConstants.eulergamma);
num_choice = 2;price_states = 2000:100:2500;
mileage_states = 0:5:100;
num_price_states = length(price_states);
num_mileage_states = length(mileage_states);
num_states = num_price_states * num_mileage_states;
states_matrix = Matrix(
reshape(
reinterpret(
Int,
collect(Iterators.product(price_states, mileage_states))
),
(2, :)
)'
);kappa_true = [0.25, 0.05];
mileage_trans_mat_true = generateMileageTransition(
kappa_true,
num_mileage_states
);
mileage_trans_mat_true[1:4, 1:4, 1]4×4 Matrix{Float64}:
0.7 0.25 0.05 0.0
0.0 0.7 0.25 0.05
0.0 0.0 0.7 0.25
0.0 0.0 0.0 0.7
lambda_true = [
0.1, 0.2, 0.2, 0.2, 0.2,
0.1, 0.2, 0.2, 0.2, 0.2,
0.1, 0.1, 0.2, 0.2, 0.1,
0.1, 0.1, 0.2, 0.2, 0.1,
0.05, 0.05, 0.1, 0.1, 0.2,
0.05, 0.05, 0.1, 0.1, 0.2
];
price_trans_mat_true = generatePriceTransition(
lambda_true,
num_price_states
);trans_mat_true = Array{Float64}(
undef, num_states, num_states, num_choice
);
for i in 1:num_choice
trans_mat_true[:, :, i] = kron(
mileage_trans_mat_true[:, :, i],
price_trans_mat_true
);
endprice_trans_eigen = eigvecs(price_trans_mat_true');
price_dist_steady = (
price_trans_eigen[:, end] / sum(price_trans_eigen[:, end])
)6-element Vector{Float64}:
0.07377049180327863
0.07377049180327883
0.1639344262295082
0.16393442622950818
0.28571428571428564
0.23887587822014061
@time EV_true = calculateEV(
theta_true, beta, trans_mat_true, states_matrix
); 0.727990 seconds (2.26 M allocations: 202.622 MiB, 9.86% gc time, 90.73% compilation time)
U_true = calculateFlowUtil(theta_true, states_matrix);
V_CS_true = U_true + beta .* EV_true;prob_buy_true_mat = reshape(
exp.(V_CS_true[:, 2]) ./ sum(exp.(V_CS_true), dims = 2),
(num_price_states, num_mileage_states)
);7.2 データの生成
num_consumer = 1000;
num_period = 12 * 50;
num_period_obs = 12 * 10;
num_obs = num_consumer * num_period;
Random.seed!(42);data_gen = reduce(vcat, [generateData(
consumer_id,
V_CS_true,
trans_mat_true,
price_dist_steady
) for consumer_id in 1:num_consumer]) |>
filter(:period => (x -> x > num_period - num_period_obs))
data_gen[!, :price] = states_matrix[data_gen.state, 1];
data_gen[!, :mileage] = states_matrix[data_gen.state, 2];describe(
data_gen[:, [:price, :mileage, :action]], :all
)[:, [:variable, :mean, :std, :min, :max]]3×5 DataFrame
| Row | variable | mean | std | min | max |
|---|---|---|---|---|---|
| Symbol | Float64 | Float64 | Int64 | Int64 | |
| 1 | price | 2322.63 | 151.826 | 2000 | 2500 |
| 2 | mileage | 33.0198 | 23.6924 | 0 | 100 |
| 3 | action | 0.02915 | 0.168228 | 0 | 1 |
histogram(data_gen.price, bar_width = 50, legend=false)
xlabel!("Price (thousand JPY)")
ylabel!("Frequency")histogram(
data_gen.mileage,
bar_width = 3,
legend=false,
bins = [collect(mileage_states); [105]] .- 2.5
)
xlabel!("Mileage (thousand km)")
ylabel!("Frequency")bar(
price_states,
combine(groupby(data_gen, :price), :action => mean).action_mean,
bar_width = 50,
legend=false
)
xlabel!("Price (thousand JPY)")
ylabel!("Purchase probability")bar(
mileage_states,
combine(groupby(data_gen, :mileage), :action => mean).action_mean,
bar_width = 3,
legend=false
)
xlabel!("Mileage (thousand km)")
ylabel!("Purchase probability")次の図は「観測された条件付き購入確率」を示す。 元のサポートサイトでは3Dプロットを使って示されていたが、Juliaではいい感じの3Dプロットを書くのが難しかったため、代わりにヒートマップで示している。
heatmap(
mileage_states,
price_states,
reshape(
combine(
groupby(data_gen, [:mileage, :price]),
:action => mean
).action_mean,
(num_price_states, num_mileage_states)
),
xlabel = "Mileage",
ylabel = "Price",
title = "Conditional probability of buying"
)7.3 遷移行列の推定
transform!(
groupby(data_gen, :consumer_id),
[:price, :mileage, :action] .=> ShiftedArrays.lag
);num_cond_obs_mileage = combine(
groupby(
transform(
data_gen |>
filter(
:period =>
(x -> x != (num_period - num_period_obs + 1))
),
[:mileage_lag, :mileage, :action_lag] =>
ByRow(
(mileage_lag, mileage, action_lag) ->
(
(
(action_lag == 0) &
(5 <= mileage_lag <= 95) &
(mileage_lag == mileage)
) |
((action_lag == 1) & (mileage == 0))
) ? "cond_obs_mileage1" :
(
(
(action_lag == 0) &
(5 <= mileage_lag <= 90) &
(mileage_lag == mileage - 5)
) |
((action_lag == 1) & (mileage == 5))
) ? "cond_obs_mileage2" :
(
(
(action_lag == 0) &
(5 <= mileage_lag <= 90) &
(mileage_lag == mileage - 10)
) |
((action_lag == 1) & (mileage == 10))
) ? "cond_obs_mileage3" :
(
(
(action_lag == 0) &
(mileage_lag == 95) &
(mileage == 100)
)
) ? "cond_obs_mileage4" :
"other"
) =>
:cond_obs_mileage
),
[:cond_obs_mileage]
),
nrow => :num_cond_obs
) |> filter(:cond_obs_mileage => (x -> (x != "other")));
num_cond_obs_mileage = Dict(
k => v[1, "num_cond_obs"]
for ((k, ), v) in pairs(groupby(num_cond_obs_mileage, :cond_obs_mileage))
);kappa_est = zeros(2);
kappa_est[1] = num_cond_obs_mileage["cond_obs_mileage2"] * (
num_cond_obs_mileage["cond_obs_mileage2"] +
num_cond_obs_mileage["cond_obs_mileage3"] +
num_cond_obs_mileage["cond_obs_mileage4"]
) / (
(
num_cond_obs_mileage["cond_obs_mileage2"] +
num_cond_obs_mileage["cond_obs_mileage3"]
) * (
num_cond_obs_mileage["cond_obs_mileage1"] +
num_cond_obs_mileage["cond_obs_mileage2"] +
num_cond_obs_mileage["cond_obs_mileage3"] +
num_cond_obs_mileage["cond_obs_mileage4"]
)
);
kappa_est[2] = num_cond_obs_mileage["cond_obs_mileage3"] * (
num_cond_obs_mileage["cond_obs_mileage2"] +
num_cond_obs_mileage["cond_obs_mileage3"] +
num_cond_obs_mileage["cond_obs_mileage4"]
) / (
(
num_cond_obs_mileage["cond_obs_mileage2"] +
num_cond_obs_mileage["cond_obs_mileage3"]
) * (
num_cond_obs_mileage["cond_obs_mileage1"] +
num_cond_obs_mileage["cond_obs_mileage2"] +
num_cond_obs_mileage["cond_obs_mileage3"] +
num_cond_obs_mileage["cond_obs_mileage4"]
)
);
kappa_est2-element Vector{Float64}:
0.2507060588583926
0.05038686722769128
Infomat_mileage_est = zeros((2, 2));
Infomat_mileage_est[1, 1] = (
(
num_cond_obs_mileage["cond_obs_mileage1"] /
(1 - kappa_est[1] - kappa_est[2])^2
) +
(num_cond_obs_mileage["cond_obs_mileage2"] / kappa_est[1]^2) +
(
num_cond_obs_mileage["cond_obs_mileage4"] /
(kappa_est[1] + kappa_est[2])^2
)
);
Infomat_mileage_est[1, 2] = (
(
num_cond_obs_mileage["cond_obs_mileage1"] /
(1 - kappa_est[1] - kappa_est[2])^2
) +
(
num_cond_obs_mileage["cond_obs_mileage4"] /
(kappa_est[1] + kappa_est[2])^2
)
);
Infomat_mileage_est[2, 1] = Infomat_mileage_est[1, 2];
Infomat_mileage_est[2, 2] = (
(
num_cond_obs_mileage["cond_obs_mileage1"] /
(1 - kappa_est[1] - kappa_est[2])^2
) +
(num_cond_obs_mileage["cond_obs_mileage3"] / kappa_est[2]^2) +
(
num_cond_obs_mileage["cond_obs_mileage4"] /
(kappa_est[1] + kappa_est[2])^2
)
);
kappa_se = sqrt.(diag(inv(Infomat_mileage_est)));DataFrame(kappa_est = kappa_est, kappa_se = kappa_se)2×2 DataFrame
| Row | kappa_est | kappa_se |
|---|---|---|
| Float64 | Float64 | |
| 1 | 0.250706 | 0.0013098 |
| 2 | 0.0503869 | 0.000662066 |
num_cond_obs_price = combine(
groupby(
data_gen |>
filter(
:period =>
(x -> x != (num_period - num_period_obs + 1))
),
[:price_lag, :price]
),
nrow => :num_cond_obs
)
num_cond_obs_price = [
(
num_cond_obs_price |>
filter(
[:price_lag, :price] =>
(
(price_lag, price) ->
((price_lag == p_lag) & (price == p))
)
)
)[1, :num_cond_obs]
for p_lag in price_states, p in price_states
]
lambda_est_mat = num_cond_obs_price ./ sum(num_cond_obs_price, dims = 2);
lambda_est = lambda_est_mat'[lambda_est_mat' .!= diag(lambda_est_mat)];lambda_se = vcat([
sqrt.(diag(inv(
(
diagm(num_cond_obs_price[i, :])[Not(i), Not(i)] ./
lambda_est_mat[Not(i), Not(i)].^2
) .+ (
num_cond_obs_price[i, i] /
lambda_est_mat[i, i]^2
)
))) for i in 1:num_price_states
]...)30-element Vector{Float64}:
0.003331377707369882
0.00671087202774432
0.006726990888800794
0.009561549640256135
0.009611051522479718
0.003237157226120388
0.006683210833495453
0.006696372126260586
0.009621285444288651
0.009781740363963828
0.002223144478519486
0.0022780260513659977
0.004516183865925486
⋮
0.006813746538738526
0.007677243174949639
0.002356517399713087
0.0023843478474719036
0.0045057964689017155
0.0045121986971131695
0.005002569765090251
0.0026110065055348717
0.0025918295553672807
0.004885082439414729
0.004962738202957978
0.005515586477784162
DataFrame(
lambda_est = lambda_est,
lambda_se = lambda_se
)30×2 DataFrame
5 rows omitted
| Row | lambda_est | lambda_se |
|---|---|---|
| Float64 | Float64 | |
| 1 | 0.101205 | 0.00333138 |
| 2 | 0.196952 | 0.00671087 |
| 3 | 0.198431 | 0.00672699 |
| 4 | 0.205254 | 0.00956155 |
| 5 | 0.198886 | 0.00961105 |
| 6 | 0.104459 | 0.00323716 |
| 7 | 0.200023 | 0.00668321 |
| 8 | 0.201733 | 0.00669637 |
| 9 | 0.20447 | 0.00962129 |
| 10 | 0.188505 | 0.00978174 |
| 11 | 0.0998312 | 0.00222314 |
| 12 | 0.0979389 | 0.00227803 |
| 13 | 0.205646 | 0.00451618 |
| ⋮ | ⋮ | ⋮ |
| 19 | 0.200477 | 0.00681375 |
| 20 | 0.101128 | 0.00767724 |
| 21 | 0.0495297 | 0.00235652 |
| 22 | 0.0498236 | 0.00238435 |
| 23 | 0.1 | 0.0045058 |
| 24 | 0.100911 | 0.0045122 |
| 25 | 0.199265 | 0.00500257 |
| 26 | 0.0486584 | 0.00261101 |
| 27 | 0.0509654 | 0.00259183 |
| 28 | 0.103705 | 0.00488508 |
| 29 | 0.100511 | 0.00496274 |
| 30 | 0.198112 | 0.00551559 |
7.4 パラメタの推定
7.4.1 静学的なロジットによる推定
@time logit_stat_opt = optimize(
x -> - calculateLogLikelihoodStatic(x, states_matrix, data_gen),
theta_true,
Optim.Options(show_trace = false)
) 2.115396 seconds (71.91 M allocations: 1.319 GiB, 6.80% gc time, 21.10% compilation time)
* Status: success
* Candidate solution
Final objective value: 1.350416e+04
* Found with
Algorithm: Nelder-Mead
* Convergence measures
√(Σ(yᵢ-ȳ)²)/n ≤ 1.0e-08
* Work counters
Seconds run: 2 (vs limit Inf)
Iterations: 48
f(x) calls: 98
theta_est_stat = logit_stat_opt.minimizer;
hessian_stat = ForwardDiff.hessian(
x -> - calculateLogLikelihoodStatic(x, states_matrix, data_gen),
logit_stat_opt.minimizer
);
theta_se_stat = sqrt.(diag(inv(hessian_stat)));DataFrame(theta_est_stat = theta_est_stat, theta_se_stat = theta_se_stat)2×2 DataFrame
| Row | theta_est_stat | theta_se_stat |
|---|---|---|
| Float64 | Float64 | |
| 1 | 0.0428172 | 0.000692099 |
| 2 | 0.00237647 | 1.9178e-5 |
7.4.2 不動点アルゴリズムを用いた、動学的なロジットによる推定
mileage_trans_mat_hat = generateMileageTransition(
kappa_est,
num_mileage_states
);
price_trans_mat_hat = generatePriceTransition(
lambda_est,
num_price_states
);
trans_mat_hat = Array{Float64}(
undef, num_states, num_states, num_choice
);
for i in 1:num_choice
trans_mat_hat[:, :, i] = kron(mileage_trans_mat_hat[:, :, i], price_trans_mat_hat);
end@time NFXP_opt = optimize(
x -> - calculateLogLikelihoodDynamic(x, beta, trans_mat_hat, states_matrix, data_gen),
theta_true,
Optim.Options(show_trace = false)
) 6.368038 seconds (73.23 M allocations: 6.324 GiB, 9.68% gc time, 1.16% compilation time)
* Status: success
* Candidate solution
Final objective value: 1.333722e+04
* Found with
Algorithm: Nelder-Mead
* Convergence measures
√(Σ(yᵢ-ȳ)²)/n ≤ 1.0e-08
* Work counters
Seconds run: 6 (vs limit Inf)
Iterations: 48
f(x) calls: 94
theta_est = NFXP_opt.minimizer;
hessian = ForwardDiff.hessian(
x -> - calculateLogLikelihoodDynamic(x, beta, trans_mat_hat, states_matrix, data_gen),
theta_est
);
theta_se = sqrt.(diag(inv(hessian)));DataFrame(theta_est = theta_est, theta_se = theta_se)2×2 DataFrame
| Row | theta_est | theta_se |
|---|---|---|
| Float64 | Float64 | |
| 1 | 0.00401826 | 8.88336e-5 |
| 2 | 0.00300455 | 3.67953e-5 |
CSV.write("tmp/single_agent_dynamic_basic/data_gen.csv", data_gen);
serialize("tmp/single_agent_dynamic_basic/kappa_est.ser", kappa_est);
serialize("tmp/single_agent_dynamic_basic/lambda_est.ser", lambda_est);
serialize("tmp/single_agent_dynamic_basic/theta_est.ser", theta_est);