open WHR2019.csv --quiet

# impose panel structure
setobs Countryname Year --panel-vars

list L = LifeLadder LogGDPpercapita

# determine number of non-missing observations on
# LifeLadder and LogGDPpercapita for each Year in
# the dataset
matrix yrvals = values(Year) ~ 0
loop i=1..rows(yrvals) -q
  yr = yrvals[i,1]
  smpl Year == yr --restrict --replace
  yrvals[i,2] = sum(ok(L))
endloop
smpl full
print yrvals

# create the level and square of GDP per capita
GDPpc = exp(LogGDPpercapita)
square GDPpc

# restrict to cross section of countries in a recent
# year that has a good number of non-missing values
smpl Year == 2017 --restrict

# try straight linear fit
ols LifeLadder 0 GDPpc

# fit using log of GPD per capita
ols LifeLadder 0 LogGDPpercapita
series logfit = $yhat

# fit using quadratic of GPD per capita
ols LifeLadder 0 GDPpc sq_GDPpc
series quadfit = $yhat

# plot the two better fits
gnuplot LifeLadder logfit GDPpc --output=display
gnuplot LifeLadder quadfit GDPpc --output=display