**1 Preliminaries
clear
set more off
set mem 1000m

**2 Create a file that chains year-months together in order
**
**  Note that to chain you have to always match later years to earlier years. That is the reason that the 
**  second loop below is nested.
**
**  Note that you must set the local variables for the beginning and ending year you want, i.e., the long
**  difference that you want to take; these locals govern both this and the next secion. 

local b    = 1989.06
local e    = 2009.07
local list1 = "1989.06 1989.07 1990.01 1990.05 1990.07 1990.08 1990.10 1991.01 1991.02 1991.05 1991.07 1992.01 1992.05 1992.07 1993.01 1993.02 1993.06 1993.07 1993.08 1993.11 1993.12 1994.01 1994.04 1994.06 1995.01 1995.07 1995.09 1996.01 1996.06 1996.07"
local list2 = "1996.11 1997.01 1997.02 1997.06 1997.07 1997.08 1998.01 1998.03 1998.04 1998.07 1998.08 1999.01 1999.07 2000.01 2000.03 2000.04 2000.07 2000.12 2001.01 2001.07 2002.01 2002.07 2002.08 2003.01 2003.02 2003.04 2003.07 2004.01 2004.02 2004.04" 
local list3 = "2004.07 2005.01 2005.07 2005.11 2006.01 2006.03 2006.04 2006.06 2006.07 2007.01 2007.07 2008.01 2008.04 2008.07 2008.10 2009.01 2009.07"

set more off
quietly {

	*chop up the data in the main file created above year and rename the vars for
	*the merging to take place in the next loop; have to do this for every year-month
	*because chains below need to start, iteratively, with each year-month
	foreach y in `list1' `list2' `list3' {
	      noisily display [`y']
		local yn = int(`y'*100)
		use hts_concordances_20100522_02, clear
		keep if effyr==`y' 
		rename new new`yn'
		rename obsolete obs`yn'
		rename setyr setyr`yn'
		rename effyr effyr`yn'
		order obs`yn' new`yn'
		sort obs`yn' 
		save temp_xchain_`yn', replace
	}

	*use the chopped up files from above to chain the obs-new matches across years. here, the goal is to find 
	*new's from subsequent years that modify new's from earlier years
	*note that after the inside loop, which matches subsequent year-months to a given starting year-month,
      *drop observations unless they are chained, i.e., unless the merge code = 3
	foreach s in `list1' `list2' `list3' {

	local sn = int(`s'*100)

        if `s'>=`b' & `s'<=`e' {
		use temp_xchain_`sn', clear
		rename obs`sn' obs
		foreach t in `list1' `list2' `list3' {
			if `t'>`s' & `t'<=`e' {	
				noisily display [`s'] " " [`t']
				local tn = int(`t'*100)
				rename new`sn' obs`tn'
				sort obs`tn'
				joinby obs`tn' using temp_xchain_`tn', unmatched(master)
				noisily tab _merge
				drop if _merge==2
				rename _merge _m`sn'`tn' 
				rename obs`tn' new`sn'
			}
		}
		gen _mjunk=0
		egen idx = rowmax(_m*)
		noisily tab idx
		keep if idx==3
		sort obs
		drop _m*
		save temp2_xchain_`sn', replace
        }
	}
}



**3 Assign single setyear to all members of a family
**
**Put the above chains, each of which starts with a different year from 1989 to 2004, back together into 
**one file for the whole sample period; 
**challenge here is to set a single setyr for all "families" revealed by the chain; 
**note that there are two cases for a "family". in the first case, all members sprout from the same obsolete 
**code in some year. in the second, two sub-families in an early year are joined by a common code or set of codes 
**in a subsequent year. 
**the iteration of min commands below takes care of both cases by searching for the setyr for a family that
**covers all of its members.


set more off
local b    = 1989.06
local e    = 2009.07
local b1   = 1989.01
local list1 = "1989.06 1989.07 1990.01 1990.05 1990.07 1990.08 1990.10 1991.01 1991.02 1991.05 1991.07 1992.01 1992.05 1992.07 1993.01 1993.02 1993.06 1993.07 1993.08 1993.11 1993.12 1994.01 1994.04 1994.06 1995.01 1995.07 1995.09 1996.01 1996.06 1996.07"
local list2 = "1996.11 1997.01 1997.02 1997.06 1997.07 1997.08 1998.01 1998.03 1998.04 1998.07 1998.08 1999.01 1999.07 2000.01 2000.03 2000.04 2000.07 2000.12 2001.01 2001.07 2002.01 2002.07 2002.08 2003.01 2003.02 2003.04 2003.07 2004.01 2004.02 2004.04" 
local list3 = "2004.07 2005.01 2005.07 2005.11 2006.01 2006.03 2006.04 2006.06 2006.07 2007.01 2007.07 2008.01 2008.04 2008.07 2008.10 2009.01 2009.07"

local bn  = int(`b'*100)
local en  = int(`e'*100)
local b1n = int(`b1'*100)

use temp2_xchain_`bn', clear
foreach y in `list1' `list2' `list3' {
  if  `y'>`b' & `y'<=`e' {
	local yn  = int(`y'*100)
	display [`y']
	append using temp2_xchain_`yn'
  }
}
keep obs new* setyr* effyr*
capture duplicates drop
egen double setyr = rowmin(setyr*)
egen nchain = rownonmiss(new*)
rename obs obsolete
order obs setyr
sort obs
save temp2_xchain, replace

use temp2_xchain, clear
drop setyr effyr*
egen t1 = seq(), by(obs)
reshape long new setyr, i(obs t1) j(effyr)
rename effyr t2
gen double effyr = t2/100
drop if new==. & setyr==.
drop t1 nchain t2
duplicates drop
egen osd=sd(setyr), by(obs)
egen nsd=sd(setyr), by(new)
sum nsd osd
drop osd nsd 


*Now add back in the obsolete-new observations that are not part of chains.
*Have to add these in before the min loop below in case a non-chain obs-pair is part of a family
sort obsolete new effyr
merge obsolete new effyr using hts_concordances_20100522_02
drop if effyr<`b' | effyr>`e'
tab _merge
drop _merge


*now start family identification loop
egen double t1     = min(setyr), by(obs)
rename setyr oldsetyr
local zzz = 2
local stop = 0
while `stop'==0 {
  quietly {
	noisily display [`zzz']
	local zlag = `zzz'-1
	*mod(x,y) = x - y*int(x/y).
	if mod(`zzz',2)==0 { 	
		egen double t`zzz' = min(t`zlag'), by(new)
	}
	if mod(`zzz',2)~=0 {
		egen double t`zzz' = min(t`zlag'), by(obs)
	}
	compare t`zzz' t`zlag'
	gen idx = t`zzz'==t`zlag'
	tab idx
	local stop = r(r)==1
	local zzz = `zzz'+1
	noisily display r(r) " " [`stop']
	drop idx
  }
}
local yyy = `zzz'-1
gen double setyr = t`yyy'
keep obs effyr new setyr
rename effyr effyrmo
gen effyr = int(effyrmo)
duplicates drop
sort obsolete new effyrmo
save  hts_concordances_20100522_`bn'_`en', replace
outsheet using hts_concordances_20100522_`bn'_`en'.csv, replace	
