7  基礎編

using CSV
using LinearAlgebra
using Random
using StatsBase
using Distributions
using DataFrames
using Plots
using ShiftedArrays
using ForwardDiff
using Optim
using Serialization
for file in readdir("functions/single_agent_dynamic/")
    include("functions/single_agent_dynamic/" * file)
end

7.1 パラメタの設定

theta_true = [0.004, 0.003];

beta = 0.99;
Euler_const = Float64(MathConstants.eulergamma);

num_choice = 2;
price_states = 2000:100:2500;
mileage_states = 0:5:100;
num_price_states = length(price_states);
num_mileage_states = length(mileage_states);
num_states = num_price_states * num_mileage_states;

states_matrix = Matrix(
    reshape(
        reinterpret(
            Int, 
            collect(Iterators.product(price_states, mileage_states))
            ),
        (2, :)
        )'
    );
kappa_true = [0.25, 0.05];

mileage_trans_mat_true = generateMileageTransition(
    kappa_true, 
    num_mileage_states
    );

mileage_trans_mat_true[1:4, 1:4, 1]
4×4 Matrix{Float64}:
 0.7  0.25  0.05  0.0
 0.0  0.7   0.25  0.05
 0.0  0.0   0.7   0.25
 0.0  0.0   0.0   0.7
lambda_true = [
    0.1, 0.2, 0.2, 0.2, 0.2,
    0.1, 0.2, 0.2, 0.2, 0.2,
    0.1, 0.1, 0.2, 0.2, 0.1,
    0.1, 0.1, 0.2, 0.2, 0.1,
    0.05, 0.05, 0.1, 0.1, 0.2,
    0.05, 0.05, 0.1, 0.1, 0.2
];

price_trans_mat_true = generatePriceTransition(
    lambda_true, 
    num_price_states
    );
trans_mat_true = Array{Float64}(
    undef, num_states, num_states, num_choice
    );
for i in 1:num_choice
    trans_mat_true[:, :, i] = kron(
        mileage_trans_mat_true[:, :, i], 
        price_trans_mat_true
        );
end
price_trans_eigen = eigvecs(price_trans_mat_true');
price_dist_steady = (
    price_trans_eigen[:, end] / sum(price_trans_eigen[:, end])
)
6-element Vector{Float64}:
 0.07377049180327863
 0.07377049180327883
 0.1639344262295082
 0.16393442622950818
 0.28571428571428564
 0.23887587822014061
@time EV_true = calculateEV(
    theta_true, beta, trans_mat_true, states_matrix
    );
  0.727990 seconds (2.26 M allocations: 202.622 MiB, 9.86% gc time, 90.73% compilation time)
U_true = calculateFlowUtil(theta_true, states_matrix);
V_CS_true = U_true + beta .* EV_true;
prob_buy_true_mat = reshape(
    exp.(V_CS_true[:, 2]) ./ sum(exp.(V_CS_true), dims = 2), 
    (num_price_states, num_mileage_states)
    );

7.2 データの生成

num_consumer = 1000;

num_period = 12 * 50;
num_period_obs = 12 * 10;

num_obs = num_consumer * num_period;

Random.seed!(42);
data_gen = reduce(vcat, [generateData(
    consumer_id,
    V_CS_true,
    trans_mat_true,
    price_dist_steady
    ) for consumer_id in 1:num_consumer]) |>
    filter(:period => (x -> x > num_period - num_period_obs))

data_gen[!, :price] = states_matrix[data_gen.state, 1];
data_gen[!, :mileage] = states_matrix[data_gen.state, 2];
describe(
    data_gen[:, [:price, :mileage, :action]], :all
    )[:, [:variable, :mean, :std, :min, :max]]
3×5 DataFrame
Rowvariablemeanstdminmax
SymbolFloat64Float64Int64Int64
1price2322.63151.82620002500
2mileage33.019823.69240100
3action0.029150.16822801
histogram(data_gen.price, bar_width = 50, legend=false)
xlabel!("Price (thousand JPY)")
ylabel!("Frequency")
histogram(
    data_gen.mileage, 
    bar_width = 3, 
    legend=false, 
    bins = [collect(mileage_states); [105]] .- 2.5
    )
xlabel!("Mileage (thousand km)")
ylabel!("Frequency")
bar(
    price_states,
    combine(groupby(data_gen, :price), :action => mean).action_mean,
    bar_width = 50, 
    legend=false
)
xlabel!("Price (thousand JPY)")
ylabel!("Purchase probability")
bar(
    mileage_states,
    combine(groupby(data_gen, :mileage), :action => mean).action_mean,
    bar_width = 3, 
    legend=false
)
xlabel!("Mileage (thousand km)")
ylabel!("Purchase probability")

次の図は「観測された条件付き購入確率」を示す。 元のサポートサイトでは3Dプロットを使って示されていたが、Juliaではいい感じの3Dプロットを書くのが難しかったため、代わりにヒートマップで示している。

heatmap(
    mileage_states,
    price_states,
    reshape(
        combine(
            groupby(data_gen, [:mileage, :price]), 
            :action => mean
            ).action_mean, 
        (num_price_states, num_mileage_states)
    ),
    xlabel = "Mileage",
    ylabel = "Price",
    title = "Conditional probability of buying"
)

7.3 遷移行列の推定

transform!(
    groupby(data_gen, :consumer_id), 
    [:price, :mileage, :action] .=> ShiftedArrays.lag
    );
num_cond_obs_mileage = combine(
    groupby(
        transform(
            data_gen |>
                filter(
                    :period => 
                    (x -> x != (num_period - num_period_obs + 1))
                    ),
            [:mileage_lag, :mileage, :action_lag] =>
            ByRow(
                (mileage_lag, mileage, action_lag) ->
                (
                    (
                        (action_lag == 0) & 
                        (5 <= mileage_lag <= 95) & 
                        (mileage_lag == mileage)
                        ) |
                    ((action_lag == 1) & (mileage == 0))
                    ) ? "cond_obs_mileage1" :
                (
                    (
                        (action_lag == 0) & 
                        (5 <= mileage_lag <= 90) & 
                        (mileage_lag == mileage - 5)
                        ) |
                    ((action_lag == 1) & (mileage == 5))
                    ) ? "cond_obs_mileage2" :
                (
                    (
                        (action_lag == 0) & 
                        (5 <= mileage_lag <= 90) & 
                        (mileage_lag == mileage - 10)
                        ) |
                    ((action_lag == 1) & (mileage == 10))
                    ) ? "cond_obs_mileage3" :
                (
                    (
                        (action_lag == 0) & 
                        (mileage_lag == 95) & 
                        (mileage == 100)
                        )
                    ) ? "cond_obs_mileage4" :
                "other"
            ) =>
            :cond_obs_mileage
            ),
        [:cond_obs_mileage]
        ),
    nrow => :num_cond_obs
) |> filter(:cond_obs_mileage => (x -> (x != "other")));

num_cond_obs_mileage = Dict(
    k => v[1, "num_cond_obs"] 
    for ((k, ), v) in pairs(groupby(num_cond_obs_mileage, :cond_obs_mileage))
    );
kappa_est = zeros(2);

kappa_est[1] = num_cond_obs_mileage["cond_obs_mileage2"] * (
    num_cond_obs_mileage["cond_obs_mileage2"] +
    num_cond_obs_mileage["cond_obs_mileage3"] +
    num_cond_obs_mileage["cond_obs_mileage4"]
) / (
    (
        num_cond_obs_mileage["cond_obs_mileage2"] + 
        num_cond_obs_mileage["cond_obs_mileage3"]
        ) * (
    num_cond_obs_mileage["cond_obs_mileage1"] +
    num_cond_obs_mileage["cond_obs_mileage2"] +
    num_cond_obs_mileage["cond_obs_mileage3"] +
    num_cond_obs_mileage["cond_obs_mileage4"]
    )
);

kappa_est[2] = num_cond_obs_mileage["cond_obs_mileage3"] * (
    num_cond_obs_mileage["cond_obs_mileage2"] +
    num_cond_obs_mileage["cond_obs_mileage3"] +
    num_cond_obs_mileage["cond_obs_mileage4"]
) / (
    (
        num_cond_obs_mileage["cond_obs_mileage2"] + 
        num_cond_obs_mileage["cond_obs_mileage3"]
        ) * (
    num_cond_obs_mileage["cond_obs_mileage1"] +
    num_cond_obs_mileage["cond_obs_mileage2"] +
    num_cond_obs_mileage["cond_obs_mileage3"] +
    num_cond_obs_mileage["cond_obs_mileage4"]
    )
);

kappa_est
2-element Vector{Float64}:
 0.2507060588583926
 0.05038686722769128
Infomat_mileage_est = zeros((2, 2));

Infomat_mileage_est[1, 1] = (
    (
        num_cond_obs_mileage["cond_obs_mileage1"] / 
        (1 - kappa_est[1] - kappa_est[2])^2
        ) +
    (num_cond_obs_mileage["cond_obs_mileage2"] / kappa_est[1]^2) +
    (
        num_cond_obs_mileage["cond_obs_mileage4"] / 
        (kappa_est[1] + kappa_est[2])^2
        )
);

Infomat_mileage_est[1, 2] = (
    (
        num_cond_obs_mileage["cond_obs_mileage1"] / 
        (1 - kappa_est[1] - kappa_est[2])^2
        ) +
    (
        num_cond_obs_mileage["cond_obs_mileage4"] / 
        (kappa_est[1] + kappa_est[2])^2
        )
);

Infomat_mileage_est[2, 1] = Infomat_mileage_est[1, 2];

Infomat_mileage_est[2, 2] = (
    (
        num_cond_obs_mileage["cond_obs_mileage1"] / 
        (1 - kappa_est[1] - kappa_est[2])^2
        ) +
    (num_cond_obs_mileage["cond_obs_mileage3"] / kappa_est[2]^2) +
    (
        num_cond_obs_mileage["cond_obs_mileage4"] / 
        (kappa_est[1] + kappa_est[2])^2
        )
);

kappa_se = sqrt.(diag(inv(Infomat_mileage_est)));
DataFrame(kappa_est = kappa_est, kappa_se = kappa_se)
2×2 DataFrame
Rowkappa_estkappa_se
Float64Float64
10.2507060.0013098
20.05038690.000662066
num_cond_obs_price = combine(
    groupby(
        data_gen |>
            filter(
                :period => 
                (x -> x != (num_period - num_period_obs + 1))
                ),
        [:price_lag, :price]
    ),
    nrow => :num_cond_obs
)

num_cond_obs_price = [
    (
        num_cond_obs_price |> 
            filter(
                [:price_lag, :price] => 
                (
                    (price_lag, price) -> 
                    ((price_lag == p_lag) & (price == p))
                    )
                    )
    )[1, :num_cond_obs]
    for p_lag in price_states, p in price_states
]

lambda_est_mat = num_cond_obs_price ./ sum(num_cond_obs_price, dims = 2);
lambda_est = lambda_est_mat'[lambda_est_mat' .!= diag(lambda_est_mat)];
lambda_se = vcat([
    sqrt.(diag(inv(
            (
            diagm(num_cond_obs_price[i, :])[Not(i), Not(i)] ./ 
            lambda_est_mat[Not(i), Not(i)].^2
        ) .+ (
            num_cond_obs_price[i, i] /
            lambda_est_mat[i, i]^2
        )
    ))) for i in 1:num_price_states
]...)
30-element Vector{Float64}:
 0.003331377707369882
 0.00671087202774432
 0.006726990888800794
 0.009561549640256135
 0.009611051522479718
 0.003237157226120388
 0.006683210833495453
 0.006696372126260586
 0.009621285444288651
 0.009781740363963828
 0.002223144478519486
 0.0022780260513659977
 0.004516183865925486
 ⋮
 0.006813746538738526
 0.007677243174949639
 0.002356517399713087
 0.0023843478474719036
 0.0045057964689017155
 0.0045121986971131695
 0.005002569765090251
 0.0026110065055348717
 0.0025918295553672807
 0.004885082439414729
 0.004962738202957978
 0.005515586477784162
DataFrame(
    lambda_est = lambda_est,
    lambda_se = lambda_se
)
30×2 DataFrame
5 rows omitted
Rowlambda_estlambda_se
Float64Float64
10.1012050.00333138
20.1969520.00671087
30.1984310.00672699
40.2052540.00956155
50.1988860.00961105
60.1044590.00323716
70.2000230.00668321
80.2017330.00669637
90.204470.00962129
100.1885050.00978174
110.09983120.00222314
120.09793890.00227803
130.2056460.00451618
190.2004770.00681375
200.1011280.00767724
210.04952970.00235652
220.04982360.00238435
230.10.0045058
240.1009110.0045122
250.1992650.00500257
260.04865840.00261101
270.05096540.00259183
280.1037050.00488508
290.1005110.00496274
300.1981120.00551559

7.4 パラメタの推定

7.4.1 静学的なロジットによる推定

@time logit_stat_opt = optimize(
    x -> - calculateLogLikelihoodStatic(x, states_matrix, data_gen),
    theta_true,
    Optim.Options(show_trace = false)
)
  2.115396 seconds (71.91 M allocations: 1.319 GiB, 6.80% gc time, 21.10% compilation time)
 * Status: success

 * Candidate solution
    Final objective value:     1.350416e+04

 * Found with
    Algorithm:     Nelder-Mead

 * Convergence measures
    √(Σ(yᵢ-ȳ)²)/n ≤ 1.0e-08

 * Work counters
    Seconds run:   2  (vs limit Inf)
    Iterations:    48
    f(x) calls:    98
theta_est_stat = logit_stat_opt.minimizer;

hessian_stat = ForwardDiff.hessian(
    x -> - calculateLogLikelihoodStatic(x, states_matrix, data_gen),
    logit_stat_opt.minimizer
    );

theta_se_stat = sqrt.(diag(inv(hessian_stat)));
DataFrame(theta_est_stat = theta_est_stat, theta_se_stat = theta_se_stat)
2×2 DataFrame
Rowtheta_est_stattheta_se_stat
Float64Float64
10.04281720.000692099
20.002376471.9178e-5

7.4.2 不動点アルゴリズムを用いた、動学的なロジットによる推定

mileage_trans_mat_hat = generateMileageTransition(
    kappa_est, 
    num_mileage_states
    );

price_trans_mat_hat = generatePriceTransition(
    lambda_est, 
    num_price_states
    );

trans_mat_hat = Array{Float64}(
    undef, num_states, num_states, num_choice
    );
for i in 1:num_choice
    trans_mat_hat[:, :, i] = kron(mileage_trans_mat_hat[:, :, i], price_trans_mat_hat);
end
@time NFXP_opt = optimize(
    x -> - calculateLogLikelihoodDynamic(x, beta, trans_mat_hat, states_matrix, data_gen),
    theta_true,
    Optim.Options(show_trace = false)
)
  6.368038 seconds (73.23 M allocations: 6.324 GiB, 9.68% gc time, 1.16% compilation time)
 * Status: success

 * Candidate solution
    Final objective value:     1.333722e+04

 * Found with
    Algorithm:     Nelder-Mead

 * Convergence measures
    √(Σ(yᵢ-ȳ)²)/n ≤ 1.0e-08

 * Work counters
    Seconds run:   6  (vs limit Inf)
    Iterations:    48
    f(x) calls:    94
theta_est = NFXP_opt.minimizer;

hessian = ForwardDiff.hessian(
    x -> - calculateLogLikelihoodDynamic(x, beta, trans_mat_hat, states_matrix, data_gen),
    theta_est
    );

theta_se = sqrt.(diag(inv(hessian)));
DataFrame(theta_est = theta_est, theta_se = theta_se)
2×2 DataFrame
Rowtheta_esttheta_se
Float64Float64
10.004018268.88336e-5
20.003004553.67953e-5
CSV.write("tmp/single_agent_dynamic_basic/data_gen.csv", data_gen);

serialize("tmp/single_agent_dynamic_basic/kappa_est.ser", kappa_est);
serialize("tmp/single_agent_dynamic_basic/lambda_est.ser", lambda_est);
serialize("tmp/single_agent_dynamic_basic/theta_est.ser", theta_est);