2  基礎編 1

using CSV
using DataFrames
using StringEncodings
using FixedEffectModels
using RegressionTables
using Plots
using Printf
using Optim
using GLM

2.1 関数の読み込み

for file in ["addIVColumns.jl", "calculateSales.jl"]
    include("functions/demand_estimation/" * file)
end

2.2 データの準備

2.2.1 データの読み込み

dataCar = CSV.File(
    open(read, "data/demand_estimation/CleanData_20180222.csv", enc"shift-jis"),
    missingstring = ["NA", ""],
    ) |> DataFrame
first(select(dataCar, Not([:base_color, :option_color])), 5)
5×34 DataFrame
RowMakerTypeNameYearSalescommentModelYear_trueMonthpricekudoukataTransMissionWheelBaseMinRideHeightweightcapacitySeatRawNumofDoor種類engine過給器displacementFuelTypeTankCapacityHorsePowerMaxTorqueFuelEfficiencyoverall_lengthoverall_widthoverall_heightinterior_lengthinterior_widthinterior_height
String15String7String31Int64Int64String?StringInt64Int64Float64String3String15String3Int64Int64?Int64Int64Int64Int64String31String15StringInt64String15Int64?Int64?Float64?Float64?Int64Int64Int64Int64?Int64?Int64?
1AudiForeignA1シリーズ20114206missing1.4 TFSI20111289.0FFDBA-8XCAX7AT24651151190423直列4気筒DOHCCAXターボ1389ハイオク4512220.419.4397017401440missingmissingmissing
2AudiForeignA1シリーズ20124502missing1.4 TFSI20121273.0FFDBA-8XCAX7AT24651151190423直列4気筒DOHCCAXターボ1389ハイオク4512220.419.4397017401440missingmissingmissing
3AudiForeignA1シリーズ20135071missing1.4 TFSI20121273.0FFDBA-8XCAX7AT24651151190423直列4気筒DOHCCAXターボ1389ハイオク4512220.419.4397017401440missingmissingmissing
4AudiForeignA3シリーズ20064830missingアトラクション20061284.0FFGH-8PBSE6AT25751401360525直列4気筒SOHCBSEなし1595ハイオク5510215.112.2428517651430missingmissingmissing
5AudiForeignA3シリーズ20073874missingアトラクション20071286.0FFGH-8PBSE6AT25751401360525直列4気筒SOHCBSEなし1595ハイオク5510215.112.2428517651430missingmissingmissing
dataHH = CSV.read("data/demand_estimation/HHsize.csv", DataFrame)
dataHH[!, :HH] = parse.(Int, replace.(dataHH.HH, "," => ""))
first(dataHH, 5)
5×2 DataFrame
RowyearHH
Int64Int64
1197533310006
2197633911052
3197734380314
4197834858696
5197935350173
dataCPI = CSV.File(
    open(read, "data/demand_estimation/zni2015s.csv", enc"shift-jis"), 
    select = 1:2,
    skipto = 7
    ) |> DataFrame
rename!(dataCPI, "類・品目" => "year", "総合" => "CPI")
first(dataCPI, 5)
5×2 DataFrame
RowyearCPI
Int64Float64
1197031.5
2197133.5
3197235.2
4197339.3
5197448.4

2.2.2 データクリーニング

dataCar = dataCar[!, [
        :Maker, :Type, :Name, :Year, :Sales, 
        :Model, :price, :kata, :weight, :FuelEfficiency, 
        :HorsePower, :overall_length, :overall_width, :overall_height
        ]]
rename!(dataCar, "Year" => "year")
data = leftjoin(dataCar, dataHH, on = :year)
data = leftjoin(data, dataCPI, on = :year)
first(data, 5)
5×16 DataFrame
RowMakerTypeNameyearSalesModelpricekataweightFuelEfficiencyHorsePoweroverall_lengthoverall_widthoverall_heightHHCPI
String15String7String31Int64Int64StringFloat64String15Int64Float64?Int64?Int64Int64Int64Int64?Float64?
1AudiForeignA1シリーズ201142061.4 TFSI289.0DBA-8XCAX119019.41223970174014405378343596.3
2AudiForeignA1シリーズ201245021.4 TFSI273.0DBA-8XCAX119019.41223970174014405417147596.2
3AudiForeignA1シリーズ201350711.4 TFSI273.0DBA-8XCAX119019.41223970174014405459474496.6
4AudiForeignA3シリーズ20064830アトラクション284.0GH-8PBSE136012.21024285176514305110200597.2
5AudiForeignA3シリーズ20073874アトラクション286.0GH-8PBSE136012.21024285176514305171304897.2
dropmissing!(data, :FuelEfficiency);
cpi2016 = dataCPI[dataCPI.year .== 2016, "CPI"][1];
data[!, :price] = data.price ./ (data.CPI / cpi2016) / 100;
data[!, :size] = (
    (data[:, :overall_length] / 1000) .* 
    (data[:, :overall_width] / 1000) .* 
    (data[:, :overall_height] / 1000)
);
data[!, :hppw] = data[:, :HorsePower] ./ data[:, :weight];

data[:, :NameID] = groupby(data, :Name).groups;

transform!(
    groupby(data, :year),
    :Sales => sum => :inside_total
);
data[!, :outside_total] = data.HH .- data.inside_total;
data[!, :share] = data.Sales ./ data.HH;
data[!, :share0] = data.outside_total ./ data.HH;

2.2.3 操作変数の構築

transform!(
    groupby(data, [:year, :Maker]),
    (
        [:hppw, :FuelEfficiency, :size] .=> 
        sum .=> 
        [:hppw_sum_own, :FuelEfficiency_sum_own, :size_sum_own]
    ),
    (
        [:hppw, :FuelEfficiency, :size] .=> 
        (x -> sum(x.^2)) .=> 
        [:hppw_sqr_sum_own, :FuelEfficiency_sqr_sum_own, :size_sqr_sum_own]
    ),
    nrow => :group_n
);
transform!(
    groupby(data, [:year]),
    (
        [:hppw, :FuelEfficiency, :size] .=> 
        sum .=> 
        [:hppw_sum_mkt, :FuelEfficiency_sum_mkt, :size_sum_mkt]
    ),
    (
        [:hppw, :FuelEfficiency, :size] .=> 
        (x -> sum(x.^2)) .=> 
        [:hppw_sqr_sum_mkt, :FuelEfficiency_sqr_sum_mkt, :size_sqr_sum_mkt]
    ),
    nrow => :mkt_n
);
for variable in ["hppw", "FuelEfficiency", "size"]
    addIVColumns!(data, variable)
end
CSV.write("tmp/demand_estimation_1/data.csv", data);

2.3 記述統計と基礎的な分析

2.3.1 イントロダクションの日評自動車

NIPPYOautoIDvec = [
    260, 4, 76, 104, 64, 54, 152, 153, 71, 197,
    42, 45, 114, 208, 209, 77, 236, 58, 127, 187,
    79, 175, 19, 117, 216, 112, 256, 119, 37, 158
];
dataNIPPYO = data[in(NIPPYOautoIDvec).(data.NameID), [:Sales, :price, :hppw, :FuelEfficiency, :size]];
dataNIPPYO[!, :log_sales] = log.(dataNIPPYO[:, :Sales]);
dataNIPPYO[!, :log_price] = log.(dataNIPPYO[:, :price]);
dataNIPPYO[!, :log10_sales] = log10.(dataNIPPYO[:, :Sales]);
dataNIPPYO[!, :log10_price] = log10.(dataNIPPYO[:, :price]);
reg(
    dataNIPPYO, 
    @formula(log_sales ~ log_price + hppw + FuelEfficiency + size),
    Vcov.robust()
)
                                 FixedEffectModel                                 
==================================================================================
Number of obs:                       196  Converged:                          true
dof (model):                           4  dof (residuals):                     190
R²:                                0.217  R² adjusted:                       0.201
F-statistic:                     19.1148  P-value:                           0.000
==================================================================================
                  Estimate  Std. Error    t-stat  Pr(>|t|)    Lower 95%  Upper 95%
──────────────────────────────────────────────────────────────────────────────────
log_price       -1.24828     0.309545   -4.03262    <1e-04   -1.85886    -0.63769
hppw            -4.19685     3.97622    -1.05549    0.2925  -12.0401      3.64635
FuelEfficiency   0.0886034   0.0283887   3.12108    0.0021    0.0326059   0.144601
size             0.306896    0.0588927   5.2111     <1e-06    0.190728    0.423063
(Intercept)      5.76211     1.06127     5.42946    <1e-06    3.66873     7.85549
==================================================================================
OLSFitLine = predict(
    lm(@formula(log10_sales ~ log10_price), dataNIPPYO),
    dataNIPPYO,
    interval = :confidence,
    level = 0.90
    );
plot(
    dataNIPPYO.price, 
    dataNIPPYO.Sales, 
    seriestype = :scatter, 
    legend = false,
    xscale = :log10,
    yscale = :log10,
    xlabel = "Price",
    ylabel = "Sales",
    xticks = ([1, 3, 10], [1, 3, 10]),
)
sortedPrediction = OLSFitLine[sortperm(dataNIPPYO[!, :price]), :]
sortedPrice = sort(dataNIPPYO[!, :price])
plot!(
    sortedPrice,
    10 .^ sortedPrediction.prediction,
    ribbon = (
        10 .^ sortedPrediction.prediction - 10 .^ sortedPrediction.lower,
        10 .^ sortedPrediction.upper - 10 .^ sortedPrediction.prediction,
        )
)

2.3.2 記述統計

describe(data[:, [:Sales, :price, :FuelEfficiency, :size, :hppw]])
5×7 DataFrame
Rowvariablemeanminmedianmaxnmissingeltype
SymbolFloat64RealFloat64RealInt64DataType
1Sales24586.4108544.03176750Int64
2price2.530470.7051762.0504212.62650Float64
3FuelEfficiency16.15975.515.440.80Float64
4size11.50535.90911.472519.15480Float64
5hppw0.09913120.0450.09268290.3238640Float64

2.4 ロジットモデルの推定とその応用

data[!, :logit_share] = log.(data[:, :share]) .- log.(data[:, :share0]);
resultOLS = reg(
    data, 
    @formula(logit_share ~ price + hppw + FuelEfficiency + size), 
    Vcov.robust()
    );
resultBLP = reg(
    data, 
    @formula(logit_share ~ (
        price ~ iv_BLP_own_hppw + iv_BLP_own_FuelEfficiency + iv_BLP_own_size + 
            iv_BLP_other_hppw + iv_BLP_other_FuelEfficiency + iv_BLP_other_size
    ) + hppw + FuelEfficiency + size),
    Vcov.robust()
);
resultGH = reg(
    data, 
    @formula(logit_share ~ (
        price ~ iv_GH_own_hppw + iv_GH_own_FuelEfficiency + iv_GH_own_size + 
            iv_GH_other_hppw + iv_GH_other_FuelEfficiency + iv_GH_other_size
    ) + hppw + FuelEfficiency + size),
    Vcov.robust(),
    save = true
);
regtable(resultOLS, resultBLP, resultGH)

--------------------------------------------------------------
                                       logit_share            
                          ------------------------------------
                                 (1)          (2)          (3)
--------------------------------------------------------------
(Intercept)               -12.255***   -12.323***   -12.973***
                             (0.365)      (0.382)      (0.393)
price                      -0.255***    -0.283***    -0.552***
                             (0.026)      (0.067)      (0.080)
hppw                          -0.654        0.213      8.426**
                             (1.284)      (2.298)      (2.638)
FuelEfficiency              0.130***     0.130***     0.127***
                             (0.010)      (0.010)      (0.010)
size                        0.182***     0.187***     0.236***
                             (0.019)      (0.021)      (0.022)
--------------------------------------------------------------
Estimator                        OLS           IV           IV
--------------------------------------------------------------
N                              1,823        1,823        1,823
R2                             0.222        0.222        0.180
Within-R2                                                     
First-stage F statistic                    33.926       51.583
--------------------------------------------------------------

2.4.1 1st stage

resultBLP1st = reg(
    data, 
    @formula(price ~ hppw + FuelEfficiency + size +
            iv_BLP_own_hppw + iv_BLP_own_FuelEfficiency + iv_BLP_own_size + 
            iv_BLP_other_hppw + iv_BLP_other_FuelEfficiency + iv_BLP_other_size
        ),
    Vcov.robust()
);
resultGH1st = reg(
    data, 
    @formula(price ~ hppw + FuelEfficiency + size +
            iv_GH_own_hppw + iv_GH_own_FuelEfficiency + iv_GH_own_size + 
            iv_GH_other_hppw + iv_GH_other_FuelEfficiency + iv_GH_other_size
        ),
    Vcov.robust()
);
regtable(resultBLP1st, resultGH1st)

---------------------------------------------------
                                      price        
                              ---------------------
                                    (1)         (2)
---------------------------------------------------
(Intercept)                      -3.159    -1.325**
                                (1.685)     (0.423)
hppw                          28.749***   23.508***
                                (0.993)     (1.547)
FuelEfficiency                   -0.011   -0.072***
                                (0.008)     (0.014)
size                           0.202***    0.189***
                                (0.015)     (0.020)
iv_BLP_own_hppw                 -0.728*            
                                (0.336)            
iv_BLP_own_FuelEfficiency     -0.007***            
                                (0.001)            
iv_BLP_own_size                0.013***            
                                (0.004)            
iv_BLP_other_hppw                 0.165            
                                (0.255)            
iv_BLP_other_FuelEfficiency     0.001**            
                                (0.000)            
iv_BLP_other_size                -0.002            
                                (0.003)            
iv_GH_own_hppw                            -1.889***
                                            (0.439)
iv_GH_own_FuelEfficiency                      0.000
                                            (0.000)
iv_GH_own_size                            -0.001***
                                            (0.000)
iv_GH_other_hppw                           0.423***
                                            (0.096)
iv_GH_other_FuelEfficiency                 0.000***
                                            (0.000)
iv_GH_other_size                           0.000***
                                            (0.000)
---------------------------------------------------
N                                 1,823       1,823
R2                                0.616       0.623
---------------------------------------------------

2.4.2 自己価格弾力性の計算

data[!, :own_elas_ols] = (
    resultOLS.coef[resultOLS.coefnames .== "price"] .* 
    data[:, :price] .* 
    (1 .- data[:, :share])
);
data[!, :own_elas_ivblp] = (
    resultBLP.coef[resultBLP.coefnames .== "price"] .* 
    data[:, :price] .* 
    (1 .- data[:, :share])
);
data[!, :own_elas_ivgh] = (
    resultGH.coef[resultGH.coefnames .== "price"] .* 
    data[:, :price] .* 
    (1 .- data[:, :share])
);
describe(data[:, r"^own_elas"])
3×7 DataFrame
Rowvariablemeanminmedianmaxnmissingeltype
SymbolFloat64Float64Float64Float64Int64DataType
1own_elas_ols-0.645328-3.22104-0.522324-0.1798920Float64
2own_elas_ivblp-0.717085-3.5792-0.580404-0.1998950Float64
3own_elas_ivgh-1.3967-6.9714-1.13048-0.3893460Float64

2.5 推定結果の応用

2.5.1 需要曲線と収入曲線を書く

data[!, :xi_fit] = resultGH.residuals;
targetNameID = 197
data[
    (data.year .== 2016) .& 
    (data.NameID .== targetNameID), 
    :]
1×54 DataFrame
RowMakerTypeNameyearSalesModelpricekataweightFuelEfficiencyHorsePoweroverall_lengthoverall_widthoverall_heightHHCPIsizehppwNameIDinside_totaloutside_totalshareshare0hppw_sum_ownFuelEfficiency_sum_ownsize_sum_ownhppw_sqr_sum_ownFuelEfficiency_sqr_sum_ownsize_sqr_sum_owngroup_nhppw_sum_mktFuelEfficiency_sum_mktsize_sum_mkthppw_sqr_sum_mktFuelEfficiency_sqr_sum_mktsize_sqr_sum_mktmkt_niv_BLP_own_hppwiv_BLP_other_hppwiv_GH_own_hppwiv_GH_other_hppwiv_BLP_own_FuelEfficiencyiv_BLP_other_FuelEfficiencyiv_GH_own_FuelEfficiencyiv_GH_other_FuelEfficiencyiv_BLP_own_sizeiv_BLP_other_sizeiv_GH_own_sizeiv_GH_other_sizelogit_shareown_elas_olsown_elas_ivblpown_elas_ivghxi_fit
String15String7String31Int64Int64StringFloat64String15Int64Float64Int64?Int64Int64Int64Int64?Float64?Float64Float64Int64Int64Int64Float64Float64Float64Float64Float64Float64Float64Float64Int64Float64Float64Float64Float64Float64Float64Int64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64?
1ToyotaRegularアルファード2016370692.5 X3.198DBA-AGH30W192011.61824915185018805695075799.917.09440.09479171973983817529669400.0006508960.9300483.84238721.0499.7640.38820615287.46520.214016.71283176.11974.41.9052667586.024147.51693.7475912.87040.01917230.236157709.42455.13942.5812698.6482.671474.641122.64907.45-7.26464-0.815288-0.905944-1.764551.16401
priceVec = range(0.3, 5, step = 0.05);
salesVec = calculateSales.(
    priceVec, 2016, targetNameID, Ref(data), Ref(resultGH)
    );
plot(salesVec, priceVec, xticks = [50000, 100000, 150000], legend = false)
xlabel!("Sales")
ylabel!("Price (million JPY)")
plot(priceVec, priceVec .* salesVec / 1000, legend = false)
xlabel!("Price (million JPY)")
ylabel!("Revenue (billion JPY)")

2.5.2 収入を最大化する価格

@time resultMaxSales = optimize(
    x -> - calculateSales(
        x[1], 
        2016, 
        targetNameID, 
        data, 
        resultGH
        ) * x[1],
    [1.0]
);

@printf("Revenue-maximizing price: %.3f \n", resultMaxSales.minimizer[1])
@printf("Maximized revenue : %.3f", -resultMaxSales.minimum)
  0.386725 seconds (1.42 M allocations: 97.367 MiB, 4.09% gc time, 98.43% compilation time)
Revenue-maximizing price: 1.814 
Maximized revenue : 144273.836