Introduction to Panel Threshold Model

2 minute read | Published:

Threshold Model Presentation Slides: download

  • Part Two: R code for this lecture

First of all, we need to generate the data of y,x,n

set.seed(123) # I set the seed for the sake of repeatability
e=rnorm(100,mean=0,sd=1)
x=rnorm(100,mean=0,sd=3^2)
n=1:100
y=rep(0,times=100)
y[1:50]=1+2*x[1:50]+e[1:50]
y[51:100]=1-2*x[51:100]+e[51:100]
data=data.frame(n,x,y,e)

Then we can plot out the relationship between y and n, as well as y and x.

library(ggplot2)
p1=ggplot(data,aes(x=n,y=y))+geom_point()
p2=ggplot(data,aes(x=x,y=y))+geom_point()
p1
p2

Regression results Suppose we do not aware the existance of thereshold effect

reg1=lm(y~x,data)
summary(reg1)

If we know the thereshold and seprate the whole data frame into two groups,then conduct regressions separately.

data1=subset(data, n<=50)
data2=subset(data,n>50)
reg2=lm(y~x,data1)
reg3=lm(y~x,data2)
summary(reg2)
summary(reg3)

If we do not know where the thereshold is, then what shoudl we do?

reg=list()
rss=array()
for (i in 1:99)
{
  dum=x
  dum[(i+1):100]=0
  reg[[i]]=lm(y~x+dum)
  rss[i]=sum(residuals(reg[[i]])^2)
}
order(rss)

Rss=data.frame(n=1:99,rss)
ggplot(Rss,aes(x=n,y=rss))+geom_point()

Now if we have two theresholds, we can still use nested loop to discern these two theresholds. However Hensen proposed a more elegant solution. Prepare the data:

set.seed(456) # I set the seed for the sake of repeatability
e=rnorm(100,mean=0,sd=1)
x=rnorm(100,mean=0,sd=3^2)
n=1:100
y=rep(0,times=100)
y[1:30]=1+2*x[1:30]+e[1:30]
y[31:60]=1-2*x[31:60]+e[31:60]
y[61:100]=1+4*x[61:100]+e[61:100]
data=data.frame(n,x,y,e)

Then we can plot out the relationship between y and n, as well as y and x.

library(ggplot2)
p1=ggplot(data,aes(x=n,y=y))+geom_point()
p2=ggplot(data,aes(x=x,y=y))+geom_point()
p1
p2

Now we use the method proposed by Hensen to discern theresholds. Pinpoint the first thereshold:

reg=list()
rss1=array()
for (i in 1:99)
{
  dum=x
  dum[(i+1):100]=0
  reg[[i]]=lm(y~x+dum)
  rss1[i]=sum(residuals(reg[[i]])^2)
}
which.min(rss1)
#plot the figure
Rss1=data.frame(n=1:99,rss1)
ggplot(Rss1,aes(x=n,y=rss1))+geom_point()

Pinpoint the second thereshold

reg2=list()
rss2=array()
for(i in 1:99)
{
  dum1=x
  dum2=x
  left=min(i,which.min(rss1))
  right=max(i,which.min(rss1))
  dum1[(left+1):100]=0
  dum2[1:right]=0
  reg2[[i]]=lm(y~x+dum1+dum2)
  rss2[i]=sum(residuals(reg2[[i]])^2)
}
which.min(rss2)
#plot the figure
Rss2=data.frame(n=1:99,rss2)
ggplot(Rss2,aes(x=n,y=rss2))+geom_point()

Pinpoint the first thereshold again

reg3=list()
rss3=array()
for(i in 1:99)
{
  dum1=x
  dum2=x
  left=min(i,which.min(rss2))
  right=max(i,which.min(rss2))
  dum1[(left+1):100]=0
  dum2[1:right]=0
  reg3[[i]]=lm(y~x+dum1+dum2)
  rss3[i]=sum(residuals(reg3[[i]])^2)
}
which.min(rss3)
#plot the figure
Rss3=data.frame(n=1:99,rss3)
ggplot(Rss3,aes(x=n,y=rss3))+geom_point()

put those aboving figures into one picture.

all=rbind(data.frame(n=1:99,rss=rss1,t="loop 1"),data.frame(n=1:99,rss=rss2,t="loop 2"),data.frame(n=1:99,rss=rss3,t="loop 3"))
p=ggplot(all, aes(x=n,y=rss))
p+geom_path(aes(position=t,color=t))+geom_point(aes(position=t,color=t))

Bootstrap, in this case, we only consider one threshold.

set.seed(123) # I set the seed for the sake of repeatability
e=rnorm(100,mean=0,sd=1)
x=rnorm(100,mean=0,sd=3^2)
n=1:100
y=rep(0,times=100)
y[1:50]=1+2*x[1:50]+e[1:50]
y[51:100]=1-2*x[51:100]+e[51:100]
data=data.frame(n,x,y,e)

breg1=lm(y~x,data)
s0=sum(residuals(breg1)^2)
library(boot)
fvalue=function(data,indices){
  d=data[indices,]
  bregloop=list()
  brss=array()
  for (i in 1:99)
  {
    dum=d$x
    dum[(i+1):100]=0
    bregloop[[i]]=lm(y~x+dum,data=d)
    brss[i]=sum(residuals(bregloop[[i]])^2)
  }
  a=which.min(brss)
  dum=d$x
  dum[(a+1):100]=0
  breg2=lm(y~x+dum,data=d)
  s1=sum(residuals(breg2)^2)
  f=(s0/s1-1)*(100-1)
  return(f)
}

result=boot(data=data,fvalue,R=99)
f=result$t
result=boot(data=data,fvalue,R=99,formula1=y~x+dum)
f=result$t

The real f0

dum=data$x
dum[51:100]=0
reg4=lm(y~x+dum,data)
rss4=sum(residuals(reg4)^2)
f0=(s0/rss4-1)*(100-1)

table(f>f0) # so the p-value=0, we should reject H0

Last question, get the confidence intervals for gamma

LR=(rss-rss4)/(rss4/(100-1))
order(LR)
c=-2*log(1-sqrt(1-0.01)) #we set the asymptotic level alpha at 1%
LR[LR<c]