using CSV
using DataFrames
using StringEncodings
using FixedEffectModels
using RegressionTables
using Plots
using Printf
using Optim
using GLM2 基礎編 1
2.1 関数の読み込み
for file in ["addIVColumns.jl", "calculateSales.jl"]
include("functions/demand_estimation/" * file)
end2.2 データの準備
2.2.1 データの読み込み
dataCar = CSV.File(
open(read, "data/demand_estimation/CleanData_20180222.csv", enc"shift-jis"),
missingstring = ["NA", ""],
) |> DataFrame
first(select(dataCar, Not([:base_color, :option_color])), 5)5×34 DataFrame
| Row | Maker | Type | Name | Year | Sales | comment | Model | Year_true | Month | price | kudou | kata | TransMission | WheelBase | MinRideHeight | weight | capacity | SeatRaw | NumofDoor | 種類 | engine | 過給器 | displacement | FuelType | TankCapacity | HorsePower | MaxTorque | FuelEfficiency | overall_length | overall_width | overall_height | interior_length | interior_width | interior_height |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| String15 | String7 | String31 | Int64 | Int64 | String? | String | Int64 | Int64 | Float64 | String3 | String15 | String3 | Int64 | Int64? | Int64 | Int64 | Int64 | Int64 | String31 | String15 | String | Int64 | String15 | Int64? | Int64? | Float64? | Float64? | Int64 | Int64 | Int64 | Int64? | Int64? | Int64? | |
| 1 | Audi | Foreign | A1シリーズ | 2011 | 4206 | missing | 1.4 TFSI | 2011 | 1 | 289.0 | FF | DBA-8XCAX | 7AT | 2465 | 115 | 1190 | 4 | 2 | 3 | 直列4気筒DOHC | CAX | ターボ | 1389 | ハイオク | 45 | 122 | 20.4 | 19.4 | 3970 | 1740 | 1440 | missing | missing | missing |
| 2 | Audi | Foreign | A1シリーズ | 2012 | 4502 | missing | 1.4 TFSI | 2012 | 1 | 273.0 | FF | DBA-8XCAX | 7AT | 2465 | 115 | 1190 | 4 | 2 | 3 | 直列4気筒DOHC | CAX | ターボ | 1389 | ハイオク | 45 | 122 | 20.4 | 19.4 | 3970 | 1740 | 1440 | missing | missing | missing |
| 3 | Audi | Foreign | A1シリーズ | 2013 | 5071 | missing | 1.4 TFSI | 2012 | 1 | 273.0 | FF | DBA-8XCAX | 7AT | 2465 | 115 | 1190 | 4 | 2 | 3 | 直列4気筒DOHC | CAX | ターボ | 1389 | ハイオク | 45 | 122 | 20.4 | 19.4 | 3970 | 1740 | 1440 | missing | missing | missing |
| 4 | Audi | Foreign | A3シリーズ | 2006 | 4830 | missing | アトラクション | 2006 | 1 | 284.0 | FF | GH-8PBSE | 6AT | 2575 | 140 | 1360 | 5 | 2 | 5 | 直列4気筒SOHC | BSE | なし | 1595 | ハイオク | 55 | 102 | 15.1 | 12.2 | 4285 | 1765 | 1430 | missing | missing | missing |
| 5 | Audi | Foreign | A3シリーズ | 2007 | 3874 | missing | アトラクション | 2007 | 1 | 286.0 | FF | GH-8PBSE | 6AT | 2575 | 140 | 1360 | 5 | 2 | 5 | 直列4気筒SOHC | BSE | なし | 1595 | ハイオク | 55 | 102 | 15.1 | 12.2 | 4285 | 1765 | 1430 | missing | missing | missing |
dataHH = CSV.read("data/demand_estimation/HHsize.csv", DataFrame)
dataHH[!, :HH] = parse.(Int, replace.(dataHH.HH, "," => ""))
first(dataHH, 5)5×2 DataFrame
| Row | year | HH |
|---|---|---|
| Int64 | Int64 | |
| 1 | 1975 | 33310006 |
| 2 | 1976 | 33911052 |
| 3 | 1977 | 34380314 |
| 4 | 1978 | 34858696 |
| 5 | 1979 | 35350173 |
dataCPI = CSV.File(
open(read, "data/demand_estimation/zni2015s.csv", enc"shift-jis"),
select = 1:2,
skipto = 7
) |> DataFrame
rename!(dataCPI, "類・品目" => "year", "総合" => "CPI")
first(dataCPI, 5)5×2 DataFrame
| Row | year | CPI |
|---|---|---|
| Int64 | Float64 | |
| 1 | 1970 | 31.5 |
| 2 | 1971 | 33.5 |
| 3 | 1972 | 35.2 |
| 4 | 1973 | 39.3 |
| 5 | 1974 | 48.4 |
2.2.2 データクリーニング
dataCar = dataCar[!, [
:Maker, :Type, :Name, :Year, :Sales,
:Model, :price, :kata, :weight, :FuelEfficiency,
:HorsePower, :overall_length, :overall_width, :overall_height
]]
rename!(dataCar, "Year" => "year")
data = leftjoin(dataCar, dataHH, on = :year)
data = leftjoin(data, dataCPI, on = :year)
first(data, 5)5×16 DataFrame
| Row | Maker | Type | Name | year | Sales | Model | price | kata | weight | FuelEfficiency | HorsePower | overall_length | overall_width | overall_height | HH | CPI |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| String15 | String7 | String31 | Int64 | Int64 | String | Float64 | String15 | Int64 | Float64? | Int64? | Int64 | Int64 | Int64 | Int64? | Float64? | |
| 1 | Audi | Foreign | A1シリーズ | 2011 | 4206 | 1.4 TFSI | 289.0 | DBA-8XCAX | 1190 | 19.4 | 122 | 3970 | 1740 | 1440 | 53783435 | 96.3 |
| 2 | Audi | Foreign | A1シリーズ | 2012 | 4502 | 1.4 TFSI | 273.0 | DBA-8XCAX | 1190 | 19.4 | 122 | 3970 | 1740 | 1440 | 54171475 | 96.2 |
| 3 | Audi | Foreign | A1シリーズ | 2013 | 5071 | 1.4 TFSI | 273.0 | DBA-8XCAX | 1190 | 19.4 | 122 | 3970 | 1740 | 1440 | 54594744 | 96.6 |
| 4 | Audi | Foreign | A3シリーズ | 2006 | 4830 | アトラクション | 284.0 | GH-8PBSE | 1360 | 12.2 | 102 | 4285 | 1765 | 1430 | 51102005 | 97.2 |
| 5 | Audi | Foreign | A3シリーズ | 2007 | 3874 | アトラクション | 286.0 | GH-8PBSE | 1360 | 12.2 | 102 | 4285 | 1765 | 1430 | 51713048 | 97.2 |
dropmissing!(data, :FuelEfficiency);cpi2016 = dataCPI[dataCPI.year .== 2016, "CPI"][1];
data[!, :price] = data.price ./ (data.CPI / cpi2016) / 100;data[!, :size] = (
(data[:, :overall_length] / 1000) .*
(data[:, :overall_width] / 1000) .*
(data[:, :overall_height] / 1000)
);
data[!, :hppw] = data[:, :HorsePower] ./ data[:, :weight];
data[:, :NameID] = groupby(data, :Name).groups;
transform!(
groupby(data, :year),
:Sales => sum => :inside_total
);
data[!, :outside_total] = data.HH .- data.inside_total;
data[!, :share] = data.Sales ./ data.HH;
data[!, :share0] = data.outside_total ./ data.HH;2.2.3 操作変数の構築
transform!(
groupby(data, [:year, :Maker]),
(
[:hppw, :FuelEfficiency, :size] .=>
sum .=>
[:hppw_sum_own, :FuelEfficiency_sum_own, :size_sum_own]
),
(
[:hppw, :FuelEfficiency, :size] .=>
(x -> sum(x.^2)) .=>
[:hppw_sqr_sum_own, :FuelEfficiency_sqr_sum_own, :size_sqr_sum_own]
),
nrow => :group_n
);
transform!(
groupby(data, [:year]),
(
[:hppw, :FuelEfficiency, :size] .=>
sum .=>
[:hppw_sum_mkt, :FuelEfficiency_sum_mkt, :size_sum_mkt]
),
(
[:hppw, :FuelEfficiency, :size] .=>
(x -> sum(x.^2)) .=>
[:hppw_sqr_sum_mkt, :FuelEfficiency_sqr_sum_mkt, :size_sqr_sum_mkt]
),
nrow => :mkt_n
);for variable in ["hppw", "FuelEfficiency", "size"]
addIVColumns!(data, variable)
endCSV.write("tmp/demand_estimation_1/data.csv", data);2.3 記述統計と基礎的な分析
2.3.1 イントロダクションの日評自動車
NIPPYOautoIDvec = [
260, 4, 76, 104, 64, 54, 152, 153, 71, 197,
42, 45, 114, 208, 209, 77, 236, 58, 127, 187,
79, 175, 19, 117, 216, 112, 256, 119, 37, 158
];dataNIPPYO = data[in(NIPPYOautoIDvec).(data.NameID), [:Sales, :price, :hppw, :FuelEfficiency, :size]];
dataNIPPYO[!, :log_sales] = log.(dataNIPPYO[:, :Sales]);
dataNIPPYO[!, :log_price] = log.(dataNIPPYO[:, :price]);
dataNIPPYO[!, :log10_sales] = log10.(dataNIPPYO[:, :Sales]);
dataNIPPYO[!, :log10_price] = log10.(dataNIPPYO[:, :price]);reg(
dataNIPPYO,
@formula(log_sales ~ log_price + hppw + FuelEfficiency + size),
Vcov.robust()
) FixedEffectModel
==================================================================================
Number of obs: 196 Converged: true
dof (model): 4 dof (residuals): 190
R²: 0.217 R² adjusted: 0.201
F-statistic: 19.1148 P-value: 0.000
==================================================================================
Estimate Std. Error t-stat Pr(>|t|) Lower 95% Upper 95%
──────────────────────────────────────────────────────────────────────────────────
log_price -1.24828 0.309545 -4.03262 <1e-04 -1.85886 -0.63769
hppw -4.19685 3.97622 -1.05549 0.2925 -12.0401 3.64635
FuelEfficiency 0.0886034 0.0283887 3.12108 0.0021 0.0326059 0.144601
size 0.306896 0.0588927 5.2111 <1e-06 0.190728 0.423063
(Intercept) 5.76211 1.06127 5.42946 <1e-06 3.66873 7.85549
==================================================================================
OLSFitLine = predict(
lm(@formula(log10_sales ~ log10_price), dataNIPPYO),
dataNIPPYO,
interval = :confidence,
level = 0.90
);
plot(
dataNIPPYO.price,
dataNIPPYO.Sales,
seriestype = :scatter,
legend = false,
xscale = :log10,
yscale = :log10,
xlabel = "Price",
ylabel = "Sales",
xticks = ([1, 3, 10], [1, 3, 10]),
)
sortedPrediction = OLSFitLine[sortperm(dataNIPPYO[!, :price]), :]
sortedPrice = sort(dataNIPPYO[!, :price])
plot!(
sortedPrice,
10 .^ sortedPrediction.prediction,
ribbon = (
10 .^ sortedPrediction.prediction - 10 .^ sortedPrediction.lower,
10 .^ sortedPrediction.upper - 10 .^ sortedPrediction.prediction,
)
)2.3.2 記述統計
describe(data[:, [:Sales, :price, :FuelEfficiency, :size, :hppw]])5×7 DataFrame
| Row | variable | mean | min | median | max | nmissing | eltype |
|---|---|---|---|---|---|---|---|
| Symbol | Float64 | Real | Float64 | Real | Int64 | DataType | |
| 1 | Sales | 24586.4 | 10 | 8544.0 | 317675 | 0 | Int64 |
| 2 | price | 2.53047 | 0.705176 | 2.05042 | 12.6265 | 0 | Float64 |
| 3 | FuelEfficiency | 16.1597 | 5.5 | 15.4 | 40.8 | 0 | Float64 |
| 4 | size | 11.5053 | 5.909 | 11.4725 | 19.1548 | 0 | Float64 |
| 5 | hppw | 0.0991312 | 0.045 | 0.0926829 | 0.323864 | 0 | Float64 |
2.4 ロジットモデルの推定とその応用
data[!, :logit_share] = log.(data[:, :share]) .- log.(data[:, :share0]);resultOLS = reg(
data,
@formula(logit_share ~ price + hppw + FuelEfficiency + size),
Vcov.robust()
);
resultBLP = reg(
data,
@formula(logit_share ~ (
price ~ iv_BLP_own_hppw + iv_BLP_own_FuelEfficiency + iv_BLP_own_size +
iv_BLP_other_hppw + iv_BLP_other_FuelEfficiency + iv_BLP_other_size
) + hppw + FuelEfficiency + size),
Vcov.robust()
);
resultGH = reg(
data,
@formula(logit_share ~ (
price ~ iv_GH_own_hppw + iv_GH_own_FuelEfficiency + iv_GH_own_size +
iv_GH_other_hppw + iv_GH_other_FuelEfficiency + iv_GH_other_size
) + hppw + FuelEfficiency + size),
Vcov.robust(),
save = true
);regtable(resultOLS, resultBLP, resultGH)
--------------------------------------------------------------
logit_share
------------------------------------
(1) (2) (3)
--------------------------------------------------------------
(Intercept) -12.255*** -12.323*** -12.973***
(0.365) (0.382) (0.393)
price -0.255*** -0.283*** -0.552***
(0.026) (0.067) (0.080)
hppw -0.654 0.213 8.426**
(1.284) (2.298) (2.638)
FuelEfficiency 0.130*** 0.130*** 0.127***
(0.010) (0.010) (0.010)
size 0.182*** 0.187*** 0.236***
(0.019) (0.021) (0.022)
--------------------------------------------------------------
Estimator OLS IV IV
--------------------------------------------------------------
N 1,823 1,823 1,823
R2 0.222 0.222 0.180
Within-R2
First-stage F statistic 33.926 51.583
--------------------------------------------------------------
2.4.1 1st stage
resultBLP1st = reg(
data,
@formula(price ~ hppw + FuelEfficiency + size +
iv_BLP_own_hppw + iv_BLP_own_FuelEfficiency + iv_BLP_own_size +
iv_BLP_other_hppw + iv_BLP_other_FuelEfficiency + iv_BLP_other_size
),
Vcov.robust()
);
resultGH1st = reg(
data,
@formula(price ~ hppw + FuelEfficiency + size +
iv_GH_own_hppw + iv_GH_own_FuelEfficiency + iv_GH_own_size +
iv_GH_other_hppw + iv_GH_other_FuelEfficiency + iv_GH_other_size
),
Vcov.robust()
);regtable(resultBLP1st, resultGH1st)
---------------------------------------------------
price
---------------------
(1) (2)
---------------------------------------------------
(Intercept) -3.159 -1.325**
(1.685) (0.423)
hppw 28.749*** 23.508***
(0.993) (1.547)
FuelEfficiency -0.011 -0.072***
(0.008) (0.014)
size 0.202*** 0.189***
(0.015) (0.020)
iv_BLP_own_hppw -0.728*
(0.336)
iv_BLP_own_FuelEfficiency -0.007***
(0.001)
iv_BLP_own_size 0.013***
(0.004)
iv_BLP_other_hppw 0.165
(0.255)
iv_BLP_other_FuelEfficiency 0.001**
(0.000)
iv_BLP_other_size -0.002
(0.003)
iv_GH_own_hppw -1.889***
(0.439)
iv_GH_own_FuelEfficiency 0.000
(0.000)
iv_GH_own_size -0.001***
(0.000)
iv_GH_other_hppw 0.423***
(0.096)
iv_GH_other_FuelEfficiency 0.000***
(0.000)
iv_GH_other_size 0.000***
(0.000)
---------------------------------------------------
N 1,823 1,823
R2 0.616 0.623
---------------------------------------------------
2.4.2 自己価格弾力性の計算
data[!, :own_elas_ols] = (
resultOLS.coef[resultOLS.coefnames .== "price"] .*
data[:, :price] .*
(1 .- data[:, :share])
);
data[!, :own_elas_ivblp] = (
resultBLP.coef[resultBLP.coefnames .== "price"] .*
data[:, :price] .*
(1 .- data[:, :share])
);
data[!, :own_elas_ivgh] = (
resultGH.coef[resultGH.coefnames .== "price"] .*
data[:, :price] .*
(1 .- data[:, :share])
);describe(data[:, r"^own_elas"])3×7 DataFrame
| Row | variable | mean | min | median | max | nmissing | eltype |
|---|---|---|---|---|---|---|---|
| Symbol | Float64 | Float64 | Float64 | Float64 | Int64 | DataType | |
| 1 | own_elas_ols | -0.645328 | -3.22104 | -0.522324 | -0.179892 | 0 | Float64 |
| 2 | own_elas_ivblp | -0.717085 | -3.5792 | -0.580404 | -0.199895 | 0 | Float64 |
| 3 | own_elas_ivgh | -1.3967 | -6.9714 | -1.13048 | -0.389346 | 0 | Float64 |
2.5 推定結果の応用
2.5.1 需要曲線と収入曲線を書く
data[!, :xi_fit] = resultGH.residuals;targetNameID = 197
data[
(data.year .== 2016) .&
(data.NameID .== targetNameID),
:]1×54 DataFrame
| Row | Maker | Type | Name | year | Sales | Model | price | kata | weight | FuelEfficiency | HorsePower | overall_length | overall_width | overall_height | HH | CPI | size | hppw | NameID | inside_total | outside_total | share | share0 | hppw_sum_own | FuelEfficiency_sum_own | size_sum_own | hppw_sqr_sum_own | FuelEfficiency_sqr_sum_own | size_sqr_sum_own | group_n | hppw_sum_mkt | FuelEfficiency_sum_mkt | size_sum_mkt | hppw_sqr_sum_mkt | FuelEfficiency_sqr_sum_mkt | size_sqr_sum_mkt | mkt_n | iv_BLP_own_hppw | iv_BLP_other_hppw | iv_GH_own_hppw | iv_GH_other_hppw | iv_BLP_own_FuelEfficiency | iv_BLP_other_FuelEfficiency | iv_GH_own_FuelEfficiency | iv_GH_other_FuelEfficiency | iv_BLP_own_size | iv_BLP_other_size | iv_GH_own_size | iv_GH_other_size | logit_share | own_elas_ols | own_elas_ivblp | own_elas_ivgh | xi_fit |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| String15 | String7 | String31 | Int64 | Int64 | String | Float64 | String15 | Int64 | Float64 | Int64? | Int64 | Int64 | Int64 | Int64? | Float64? | Float64 | Float64 | Int64 | Int64 | Int64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Int64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Int64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64? | |
| 1 | Toyota | Regular | アルファード | 2016 | 37069 | 2.5 X | 3.198 | DBA-AGH30W | 1920 | 11.6 | 182 | 4915 | 1850 | 1880 | 56950757 | 99.9 | 17.0944 | 0.0947917 | 197 | 3983817 | 52966940 | 0.000650896 | 0.930048 | 3.84238 | 721.0 | 499.764 | 0.388206 | 15287.4 | 6520.21 | 40 | 16.7128 | 3176.1 | 1974.4 | 1.90526 | 67586.0 | 24147.5 | 169 | 3.74759 | 12.8704 | 0.0191723 | 0.236157 | 709.4 | 2455.1 | 3942.58 | 12698.6 | 482.67 | 1474.64 | 1122.6 | 4907.45 | -7.26464 | -0.815288 | -0.905944 | -1.76455 | 1.16401 |
priceVec = range(0.3, 5, step = 0.05);
salesVec = calculateSales.(
priceVec, 2016, targetNameID, Ref(data), Ref(resultGH)
);plot(salesVec, priceVec, xticks = [50000, 100000, 150000], legend = false)
xlabel!("Sales")
ylabel!("Price (million JPY)")plot(priceVec, priceVec .* salesVec / 1000, legend = false)
xlabel!("Price (million JPY)")
ylabel!("Revenue (billion JPY)")2.5.2 収入を最大化する価格
@time resultMaxSales = optimize(
x -> - calculateSales(
x[1],
2016,
targetNameID,
data,
resultGH
) * x[1],
[1.0]
);
@printf("Revenue-maximizing price: %.3f \n", resultMaxSales.minimizer[1])
@printf("Maximized revenue : %.3f", -resultMaxSales.minimum) 0.386725 seconds (1.42 M allocations: 97.367 MiB, 4.09% gc time, 98.43% compilation time)
Revenue-maximizing price: 1.814
Maximized revenue : 144273.836