using CSV
using DataFrames
using StringEncodings
using FixedEffectModels
using RegressionTables
using Plots
using Printf
using Optim
using GLM
2 基礎編 1
2.1 関数の読み込み
for file in ["addIVColumns.jl", "calculateSales.jl"]
include("functions/demand_estimation/" * file)
end
2.2 データの準備
2.2.1 データの読み込み
= CSV.File(
dataCar open(read, "data/demand_estimation/CleanData_20180222.csv", enc"shift-jis"),
= ["NA", ""],
missingstring |> DataFrame
) first(select(dataCar, Not([:base_color, :option_color])), 5)
5×34 DataFrame
Row | Maker | Type | Name | Year | Sales | comment | Model | Year_true | Month | price | kudou | kata | TransMission | WheelBase | MinRideHeight | weight | capacity | SeatRaw | NumofDoor | 種類 | engine | 過給器 | displacement | FuelType | TankCapacity | HorsePower | MaxTorque | FuelEfficiency | overall_length | overall_width | overall_height | interior_length | interior_width | interior_height |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
String15 | String7 | String31 | Int64 | Int64 | String? | String | Int64 | Int64 | Float64 | String3 | String15 | String3 | Int64 | Int64? | Int64 | Int64 | Int64 | Int64 | String31 | String15 | String | Int64 | String15 | Int64? | Int64? | Float64? | Float64? | Int64 | Int64 | Int64 | Int64? | Int64? | Int64? | |
1 | Audi | Foreign | A1シリーズ | 2011 | 4206 | missing | 1.4 TFSI | 2011 | 1 | 289.0 | FF | DBA-8XCAX | 7AT | 2465 | 115 | 1190 | 4 | 2 | 3 | 直列4気筒DOHC | CAX | ターボ | 1389 | ハイオク | 45 | 122 | 20.4 | 19.4 | 3970 | 1740 | 1440 | missing | missing | missing |
2 | Audi | Foreign | A1シリーズ | 2012 | 4502 | missing | 1.4 TFSI | 2012 | 1 | 273.0 | FF | DBA-8XCAX | 7AT | 2465 | 115 | 1190 | 4 | 2 | 3 | 直列4気筒DOHC | CAX | ターボ | 1389 | ハイオク | 45 | 122 | 20.4 | 19.4 | 3970 | 1740 | 1440 | missing | missing | missing |
3 | Audi | Foreign | A1シリーズ | 2013 | 5071 | missing | 1.4 TFSI | 2012 | 1 | 273.0 | FF | DBA-8XCAX | 7AT | 2465 | 115 | 1190 | 4 | 2 | 3 | 直列4気筒DOHC | CAX | ターボ | 1389 | ハイオク | 45 | 122 | 20.4 | 19.4 | 3970 | 1740 | 1440 | missing | missing | missing |
4 | Audi | Foreign | A3シリーズ | 2006 | 4830 | missing | アトラクション | 2006 | 1 | 284.0 | FF | GH-8PBSE | 6AT | 2575 | 140 | 1360 | 5 | 2 | 5 | 直列4気筒SOHC | BSE | なし | 1595 | ハイオク | 55 | 102 | 15.1 | 12.2 | 4285 | 1765 | 1430 | missing | missing | missing |
5 | Audi | Foreign | A3シリーズ | 2007 | 3874 | missing | アトラクション | 2007 | 1 | 286.0 | FF | GH-8PBSE | 6AT | 2575 | 140 | 1360 | 5 | 2 | 5 | 直列4気筒SOHC | BSE | なし | 1595 | ハイオク | 55 | 102 | 15.1 | 12.2 | 4285 | 1765 | 1430 | missing | missing | missing |
= CSV.read("data/demand_estimation/HHsize.csv", DataFrame)
dataHH :HH] = parse.(Int, replace.(dataHH.HH, "," => ""))
dataHH[!, first(dataHH, 5)
5×2 DataFrame
Row | year | HH |
---|---|---|
Int64 | Int64 | |
1 | 1975 | 33310006 |
2 | 1976 | 33911052 |
3 | 1977 | 34380314 |
4 | 1978 | 34858696 |
5 | 1979 | 35350173 |
= CSV.File(
dataCPI open(read, "data/demand_estimation/zni2015s.csv", enc"shift-jis"),
= 1:2,
select = 7
skipto |> DataFrame
) rename!(dataCPI, "類・品目" => "year", "総合" => "CPI")
first(dataCPI, 5)
5×2 DataFrame
Row | year | CPI |
---|---|---|
Int64 | Float64 | |
1 | 1970 | 31.5 |
2 | 1971 | 33.5 |
3 | 1972 | 35.2 |
4 | 1973 | 39.3 |
5 | 1974 | 48.4 |
2.2.2 データクリーニング
= dataCar[!, [
dataCar :Maker, :Type, :Name, :Year, :Sales,
:Model, :price, :kata, :weight, :FuelEfficiency,
:HorsePower, :overall_length, :overall_width, :overall_height
]]rename!(dataCar, "Year" => "year")
= leftjoin(dataCar, dataHH, on = :year)
data = leftjoin(data, dataCPI, on = :year)
data first(data, 5)
5×16 DataFrame
Row | Maker | Type | Name | year | Sales | Model | price | kata | weight | FuelEfficiency | HorsePower | overall_length | overall_width | overall_height | HH | CPI |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
String15 | String7 | String31 | Int64 | Int64 | String | Float64 | String15 | Int64 | Float64? | Int64? | Int64 | Int64 | Int64 | Int64? | Float64? | |
1 | Audi | Foreign | A1シリーズ | 2011 | 4206 | 1.4 TFSI | 289.0 | DBA-8XCAX | 1190 | 19.4 | 122 | 3970 | 1740 | 1440 | 53783435 | 96.3 |
2 | Audi | Foreign | A1シリーズ | 2012 | 4502 | 1.4 TFSI | 273.0 | DBA-8XCAX | 1190 | 19.4 | 122 | 3970 | 1740 | 1440 | 54171475 | 96.2 |
3 | Audi | Foreign | A1シリーズ | 2013 | 5071 | 1.4 TFSI | 273.0 | DBA-8XCAX | 1190 | 19.4 | 122 | 3970 | 1740 | 1440 | 54594744 | 96.6 |
4 | Audi | Foreign | A3シリーズ | 2006 | 4830 | アトラクション | 284.0 | GH-8PBSE | 1360 | 12.2 | 102 | 4285 | 1765 | 1430 | 51102005 | 97.2 |
5 | Audi | Foreign | A3シリーズ | 2007 | 3874 | アトラクション | 286.0 | GH-8PBSE | 1360 | 12.2 | 102 | 4285 | 1765 | 1430 | 51713048 | 97.2 |
dropmissing!(data, :FuelEfficiency);
= dataCPI[dataCPI.year .== 2016, "CPI"][1];
cpi2016 :price] = data.price ./ (data.CPI / cpi2016) / 100; data[!,
:size] = (
data[!, :, :overall_length] / 1000) .*
(data[:, :overall_width] / 1000) .*
(data[:, :overall_height] / 1000)
(data[
);:hppw] = data[:, :HorsePower] ./ data[:, :weight];
data[!,
:, :NameID] = groupby(data, :Name).groups;
data[
transform!(
groupby(data, :year),
:Sales => sum => :inside_total
);:outside_total] = data.HH .- data.inside_total;
data[!, :share] = data.Sales ./ data.HH;
data[!, :share0] = data.outside_total ./ data.HH; data[!,
2.2.3 操作変数の構築
transform!(
groupby(data, [:year, :Maker]),
(:hppw, :FuelEfficiency, :size] .=>
[.=>
sum :hppw_sum_own, :FuelEfficiency_sum_own, :size_sum_own]
[
),
(:hppw, :FuelEfficiency, :size] .=>
[-> sum(x.^2)) .=>
(x :hppw_sqr_sum_own, :FuelEfficiency_sqr_sum_own, :size_sqr_sum_own]
[
),=> :group_n
nrow
);transform!(
groupby(data, [:year]),
(:hppw, :FuelEfficiency, :size] .=>
[.=>
sum :hppw_sum_mkt, :FuelEfficiency_sum_mkt, :size_sum_mkt]
[
),
(:hppw, :FuelEfficiency, :size] .=>
[-> sum(x.^2)) .=>
(x :hppw_sqr_sum_mkt, :FuelEfficiency_sqr_sum_mkt, :size_sqr_sum_mkt]
[
),=> :mkt_n
nrow );
for variable in ["hppw", "FuelEfficiency", "size"]
addIVColumns!(data, variable)
end
write("tmp/demand_estimation_1/data.csv", data); CSV.
2.3 記述統計と基礎的な分析
2.3.1 イントロダクションの日評自動車
= [
NIPPYOautoIDvec 260, 4, 76, 104, 64, 54, 152, 153, 71, 197,
42, 45, 114, 208, 209, 77, 236, 58, 127, 187,
79, 175, 19, 117, 216, 112, 256, 119, 37, 158
];
= data[in(NIPPYOautoIDvec).(data.NameID), [:Sales, :price, :hppw, :FuelEfficiency, :size]];
dataNIPPYO :log_sales] = log.(dataNIPPYO[:, :Sales]);
dataNIPPYO[!, :log_price] = log.(dataNIPPYO[:, :price]);
dataNIPPYO[!, :log10_sales] = log10.(dataNIPPYO[:, :Sales]);
dataNIPPYO[!, :log10_price] = log10.(dataNIPPYO[:, :price]); dataNIPPYO[!,
reg(
dataNIPPYO, @formula(log_sales ~ log_price + hppw + FuelEfficiency + size),
robust()
Vcov. )
FixedEffectModel
==================================================================================
Number of obs: 196 Converged: true
dof (model): 4 dof (residuals): 190
R²: 0.217 R² adjusted: 0.201
F-statistic: 19.1148 P-value: 0.000
==================================================================================
Estimate Std. Error t-stat Pr(>|t|) Lower 95% Upper 95%
──────────────────────────────────────────────────────────────────────────────────
log_price -1.24828 0.309545 -4.03262 <1e-04 -1.85886 -0.63769
hppw -4.19685 3.97622 -1.05549 0.2925 -12.0401 3.64635
FuelEfficiency 0.0886034 0.0283887 3.12108 0.0021 0.0326059 0.144601
size 0.306896 0.0588927 5.2111 <1e-06 0.190728 0.423063
(Intercept) 5.76211 1.06127 5.42946 <1e-06 3.66873 7.85549
==================================================================================
= predict(
OLSFitLine lm(@formula(log10_sales ~ log10_price), dataNIPPYO),
dataNIPPYO,= :confidence,
interval = 0.90
level
);plot(
dataNIPPYO.price,
dataNIPPYO.Sales, = :scatter,
seriestype = false,
legend = :log10,
xscale = :log10,
yscale = "Price",
xlabel = "Sales",
ylabel = ([1, 3, 10], [1, 3, 10]),
xticks
)= OLSFitLine[sortperm(dataNIPPYO[!, :price]), :]
sortedPrediction = sort(dataNIPPYO[!, :price])
sortedPrice plot!(
sortedPrice,10 .^ sortedPrediction.prediction,
= (
ribbon 10 .^ sortedPrediction.prediction - 10 .^ sortedPrediction.lower,
10 .^ sortedPrediction.upper - 10 .^ sortedPrediction.prediction,
) )
2.3.2 記述統計
describe(data[:, [:Sales, :price, :FuelEfficiency, :size, :hppw]])
5×7 DataFrame
Row | variable | mean | min | median | max | nmissing | eltype |
---|---|---|---|---|---|---|---|
Symbol | Float64 | Real | Float64 | Real | Int64 | DataType | |
1 | Sales | 24586.4 | 10 | 8544.0 | 317675 | 0 | Int64 |
2 | price | 2.53047 | 0.705176 | 2.05042 | 12.6265 | 0 | Float64 |
3 | FuelEfficiency | 16.1597 | 5.5 | 15.4 | 40.8 | 0 | Float64 |
4 | size | 11.5053 | 5.909 | 11.4725 | 19.1548 | 0 | Float64 |
5 | hppw | 0.0991312 | 0.045 | 0.0926829 | 0.323864 | 0 | Float64 |
2.4 ロジットモデルの推定とその応用
:logit_share] = log.(data[:, :share]) .- log.(data[:, :share0]); data[!,
= reg(
resultOLS
data, @formula(logit_share ~ price + hppw + FuelEfficiency + size),
robust()
Vcov.
);= reg(
resultBLP
data, @formula(logit_share ~ (
~ iv_BLP_own_hppw + iv_BLP_own_FuelEfficiency + iv_BLP_own_size +
price + iv_BLP_other_FuelEfficiency + iv_BLP_other_size
iv_BLP_other_hppw + hppw + FuelEfficiency + size),
) robust()
Vcov.
);= reg(
resultGH
data, @formula(logit_share ~ (
~ iv_GH_own_hppw + iv_GH_own_FuelEfficiency + iv_GH_own_size +
price + iv_GH_other_FuelEfficiency + iv_GH_other_size
iv_GH_other_hppw + hppw + FuelEfficiency + size),
) robust(),
Vcov.= true
save );
regtable(resultOLS, resultBLP, resultGH)
--------------------------------------------------------------
logit_share
------------------------------------
(1) (2) (3)
--------------------------------------------------------------
(Intercept) -12.255*** -12.323*** -12.973***
(0.365) (0.382) (0.393)
price -0.255*** -0.283*** -0.552***
(0.026) (0.067) (0.080)
hppw -0.654 0.213 8.426**
(1.284) (2.298) (2.638)
FuelEfficiency 0.130*** 0.130*** 0.127***
(0.010) (0.010) (0.010)
size 0.182*** 0.187*** 0.236***
(0.019) (0.021) (0.022)
--------------------------------------------------------------
Estimator OLS IV IV
--------------------------------------------------------------
N 1,823 1,823 1,823
R2 0.222 0.222 0.180
Within-R2
First-stage F statistic 33.926 51.583
--------------------------------------------------------------
2.4.1 1st stage
= reg(
resultBLP1st
data, @formula(price ~ hppw + FuelEfficiency + size +
+ iv_BLP_own_FuelEfficiency + iv_BLP_own_size +
iv_BLP_own_hppw + iv_BLP_other_FuelEfficiency + iv_BLP_other_size
iv_BLP_other_hppw
),robust()
Vcov.
);= reg(
resultGH1st
data, @formula(price ~ hppw + FuelEfficiency + size +
+ iv_GH_own_FuelEfficiency + iv_GH_own_size +
iv_GH_own_hppw + iv_GH_other_FuelEfficiency + iv_GH_other_size
iv_GH_other_hppw
),robust()
Vcov. );
regtable(resultBLP1st, resultGH1st)
---------------------------------------------------
price
---------------------
(1) (2)
---------------------------------------------------
(Intercept) -3.159 -1.325**
(1.685) (0.423)
hppw 28.749*** 23.508***
(0.993) (1.547)
FuelEfficiency -0.011 -0.072***
(0.008) (0.014)
size 0.202*** 0.189***
(0.015) (0.020)
iv_BLP_own_hppw -0.728*
(0.336)
iv_BLP_own_FuelEfficiency -0.007***
(0.001)
iv_BLP_own_size 0.013***
(0.004)
iv_BLP_other_hppw 0.165
(0.255)
iv_BLP_other_FuelEfficiency 0.001**
(0.000)
iv_BLP_other_size -0.002
(0.003)
iv_GH_own_hppw -1.889***
(0.439)
iv_GH_own_FuelEfficiency 0.000
(0.000)
iv_GH_own_size -0.001***
(0.000)
iv_GH_other_hppw 0.423***
(0.096)
iv_GH_other_FuelEfficiency 0.000***
(0.000)
iv_GH_other_size 0.000***
(0.000)
---------------------------------------------------
N 1,823 1,823
R2 0.616 0.623
---------------------------------------------------
2.4.2 自己価格弾力性の計算
:own_elas_ols] = (
data[!, .== "price"] .*
resultOLS.coef[resultOLS.coefnames :, :price] .*
data[1 .- data[:, :share])
(
);:own_elas_ivblp] = (
data[!, .== "price"] .*
resultBLP.coef[resultBLP.coefnames :, :price] .*
data[1 .- data[:, :share])
(
);:own_elas_ivgh] = (
data[!, .== "price"] .*
resultGH.coef[resultGH.coefnames :, :price] .*
data[1 .- data[:, :share])
( );
describe(data[:, r"^own_elas"])
3×7 DataFrame
Row | variable | mean | min | median | max | nmissing | eltype |
---|---|---|---|---|---|---|---|
Symbol | Float64 | Float64 | Float64 | Float64 | Int64 | DataType | |
1 | own_elas_ols | -0.645328 | -3.22104 | -0.522324 | -0.179892 | 0 | Float64 |
2 | own_elas_ivblp | -0.717085 | -3.5792 | -0.580404 | -0.199895 | 0 | Float64 |
3 | own_elas_ivgh | -1.3967 | -6.9714 | -1.13048 | -0.389346 | 0 | Float64 |
2.5 推定結果の応用
2.5.1 需要曲線と収入曲線を書く
:xi_fit] = resultGH.residuals; data[!,
= 197
targetNameID
data[.== 2016) .&
(data.year .== targetNameID),
(data.NameID :]
1×54 DataFrame
Row | Maker | Type | Name | year | Sales | Model | price | kata | weight | FuelEfficiency | HorsePower | overall_length | overall_width | overall_height | HH | CPI | size | hppw | NameID | inside_total | outside_total | share | share0 | hppw_sum_own | FuelEfficiency_sum_own | size_sum_own | hppw_sqr_sum_own | FuelEfficiency_sqr_sum_own | size_sqr_sum_own | group_n | hppw_sum_mkt | FuelEfficiency_sum_mkt | size_sum_mkt | hppw_sqr_sum_mkt | FuelEfficiency_sqr_sum_mkt | size_sqr_sum_mkt | mkt_n | iv_BLP_own_hppw | iv_BLP_other_hppw | iv_GH_own_hppw | iv_GH_other_hppw | iv_BLP_own_FuelEfficiency | iv_BLP_other_FuelEfficiency | iv_GH_own_FuelEfficiency | iv_GH_other_FuelEfficiency | iv_BLP_own_size | iv_BLP_other_size | iv_GH_own_size | iv_GH_other_size | logit_share | own_elas_ols | own_elas_ivblp | own_elas_ivgh | xi_fit |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
String15 | String7 | String31 | Int64 | Int64 | String | Float64 | String15 | Int64 | Float64 | Int64? | Int64 | Int64 | Int64 | Int64? | Float64? | Float64 | Float64 | Int64 | Int64 | Int64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Int64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Int64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64? | |
1 | Toyota | Regular | アルファード | 2016 | 37069 | 2.5 X | 3.198 | DBA-AGH30W | 1920 | 11.6 | 182 | 4915 | 1850 | 1880 | 56950757 | 99.9 | 17.0944 | 0.0947917 | 197 | 3983817 | 52966940 | 0.000650896 | 0.930048 | 3.84238 | 721.0 | 499.764 | 0.388206 | 15287.4 | 6520.21 | 40 | 16.7128 | 3176.1 | 1974.4 | 1.90526 | 67586.0 | 24147.5 | 169 | 3.74759 | 12.8704 | 0.0191723 | 0.236157 | 709.4 | 2455.1 | 3942.58 | 12698.6 | 482.67 | 1474.64 | 1122.6 | 4907.45 | -7.26464 | -0.815288 | -0.905944 | -1.76455 | 1.16401 |
= range(0.3, 5, step = 0.05);
priceVec = calculateSales.(
salesVec 2016, targetNameID, Ref(data), Ref(resultGH)
priceVec, );
plot(salesVec, priceVec, xticks = [50000, 100000, 150000], legend = false)
xlabel!("Sales")
ylabel!("Price (million JPY)")
plot(priceVec, priceVec .* salesVec / 1000, legend = false)
xlabel!("Price (million JPY)")
ylabel!("Revenue (billion JPY)")
2.5.2 収入を最大化する価格
@time resultMaxSales = optimize(
-> - calculateSales(
x 1],
x[2016,
targetNameID,
data,
resultGH* x[1],
) 1.0]
[
);
@printf("Revenue-maximizing price: %.3f \n", resultMaxSales.minimizer[1])
@printf("Maximized revenue : %.3f", -resultMaxSales.minimum)
0.386725 seconds (1.42 M allocations: 97.367 MiB, 4.09% gc time, 98.43% compilation time)
Revenue-maximizing price: 1.814
Maximized revenue : 144273.836