# delimit ;
set more 1 ;
set matsize 800 ;
log using d:\postal05\bozzo\estimate\datastep.log, replace ; 


/*  set up data for labor demand estimation by merging information from k56 data, neighboring area data,
     3digit mods data that gives fhpin and fhpout, capital data.  Write a data set called regest.dta 
    The sorting operations and numbers are:

	Letters - OCR (04), LSM (07), Manual (12), BCS (32 = 01 + 02)
	Flats   - Manual (11), FSM (33 = 05 + 06), AFSM100 (39)
	Priority - Manual (14) 
	Parcels - SPBS (34 = 09 + 10), Manual (13)  */ 


/*   merge the k56 data set (combine9904) with the data set that contains fhp disaggregated
       by incoming and outgoing sorts (fhp2.dta) */
       
use d:\postal05\bozzo\section1\data\combine9904.dta ;
drop _merge ;
replace qtr=1 if qtr==19991 | qtr==20001 | qtr==20011 | qtr==20021 | qtr==20031 | qtr==20041 ;
replace qtr=2 if qtr==19992 | qtr==20002 | qtr==20012 | qtr==20022 | qtr==20032 | qtr==20042 ;
replace qtr=3 if qtr==19993 | qtr==20003 | qtr==20013 | qtr==20023 | qtr==20033 | qtr==20043 ;
replace qtr=4 if qtr==19994 | qtr==20004 | qtr==20014 | qtr==20024 | qtr==20034 | qtr==20044 ;
sort idnum fy qtr ;
merge idnum fy qtr using d:\postal05\bozzo\3digitmods\fhp2.dta ;
tab _merge ;
sort idnum fy qtr ;


/*    merge with the capital stock data (98-03) */
drop _merge ;
merge idnum fy qtr using d:\postal05\bozzo\capital\kstock.dta ;
drop if fy==1998 ;
drop _merge ;
sort idnum fy qtr ;

/*  replace missing capital values with zero  - this is needed if we add capital types later  */

replace klsm=0 if klsm==. ; 
replace kfsm=0 if kfsm==. ;  
replace kpsm=0 if kpsm==. ;  
replace kmpbcs=0 if kmpbcs==. ;  
replace kdbcs=0 if kdbcs==. ;  
replace kocr=0 if kocr==. ;  
replace kcancel=0 if kcancel==. ;  
replace kother=0 if kother==. ;    
gen kbcs = kmpbcs + kdbcs ; 

/*  drop 47 plants with missing values for FHP letters or flats */
drop if idnum==13 | idnum==14 | idnum==17 | idnum==18 | idnum==27 | idnum==33 | idnum==34 |
        idnum==40 | idnum==41 | idnum==44 | idnum==54 | idnum==56 | idnum==57 | idnum==117 |
        idnum==120 | idnum==133 | idnum==144 | idnum==177 | idnum==197 | idnum==233 | idnum==315 | 
        idnum==324 | idnum==325 | idnum==326 | idnum==327 | idnum==328 | idnum==329 | 
        (idnum>=331 & idnum<=347) | idnum==349 | idnum==350 | idnum==351 ;
count ;

/*  TPF is zero for manual operations.  replace TPF data for manual operations with the TPH data  */

drop tpf11 tpf12 tpf13 tpf14 ;
gen tpf11=tph11 ;
gen tpf12=tph12 ;
gen tpf13=tph13 ;
gen tpf14=tph14 ;

/*  calculate total deliveries, ps's/stations/branches  */

gen dpt=log(curb + ndcbu + cent +other + rb + hct + pobox) ;
gen posb=log(lgpo + smpo + sb) ;

/*  calculate trend, qtrly, year dummies  */

tab(qtr), gen(dq) ;
tab(fy), gen(dy) ;
sort idnum fy qtr ;
quietly by idnum: gen trend=_n ;
tab(trend), gen(dtime) ;

/*  generate plant dummies  */

quietly tab(idnum), gen(id) ;

/*   create FHP variables for operations that were not reported - this makes the loops
        work right, they are not used in any calculation  */

gen fhp01=0 ;
gen fhp02=0 ;
gen fhp05=0 ;
gen fhp06=0 ;
gen fhp18=0 ;
gen fhp19=0 ;
gen fhp36=0 ;
gen fhp40=0 ;
gen fhpin07=0 ;
gen fhpin09=0 ;
gen fhpin10=0 ;
gen fhpin18=0 ;
gen fhpin19=0 ;
gen fhpin36=0 ;
gen fhpin40=0 ;
gen fhpout07=0 ;
gen fhpout09=0 ;
gen fhpout10=0 ;
gen fhpout18=0 ;
gen fhpout19=0 ;
gen fhpout36=0 ;
gen fhpout40=0 ;

/*  scale fourth qtr output and hours variables to account for the extra AP  */

for any tpf hrs fhp fhpin fhpout tph :
     replace X01=X01 *.75 if qtr==4 \
     replace X02=X02 *.75 if qtr==4 \
     replace X04=X04 *.75 if qtr==4 \
     replace X05=X05 *.75 if qtr==4 \
     replace X06=X06 *.75 if qtr==4 \
     replace X07=X07 *.75 if qtr==4 \
     replace X09=X09 *.75 if qtr==4 \
     replace X10=X10 *.75 if qtr==4 \
     replace X11=X11 *.75 if qtr==4 \
     replace X12=X12 *.75 if qtr==4 \
     replace X13=X13 *.75 if qtr==4 \
     replace X14=X14 *.75 if qtr==4 \
     replace X18=X18 *.75 if qtr==4 \
     replace X19=X19 *.75 if qtr==4 \
     replace X32=X32 *.75 if qtr==4 \
     replace X33=X33 *.75 if qtr==4 \
     replace X34=X34 *.75 if qtr==4 \
     replace X36=X36 *.75 if qtr==4 \
     replace X39=X39 *.75 if qtr==4 \
     replace X40=X40 *.75 if qtr==4 \;
     
/*  scale output measures to millions of pieces and hours to thousands */

for any tpf tph fhp fhpin fhpout hrs:
     replace X01=X01/1000 \
     replace X02=X02/1000 \
     replace X04=X04/1000 \
     replace X05=X05/1000 \
     replace X06=X06/1000 \ 
     replace X07=X07/1000 \
     replace X09=X09/1000 \
     replace X10=X10/1000 \
     replace X11=X11/1000 \
     replace X12=X12/1000 \
     replace X13=X13/1000 \
     replace X14=X14/1000 \
     replace X18=X18/1000 \
     replace X19=X19/1000 \
     replace X32=X32/1000 \
     replace X33=X33/1000 \
     replace X34=X34/1000 \
     replace X36=X36/1000 \
     replace X39=X39/1000 \
     replace X40=X40/1000 \;
    
     
/*  define fhp/tph/tpf/hrs for letters flats */
   
gen fhplet=fhp04 + fhp07 + fhp12 + fhp32  ;
gen fhpinlet=fhpin04 + fhpin12 + fhpin32 ;
/*  did not get the data to divide fhp07 (LSM operation) between incoming and outgoing.  Assign it all to outgoing */
gen fhpoutlet=fhpout04 + fhp07 + fhpout12 + fhpout32 ;
gen fhpflt=fhp11 + fhp33 + fhp39 ;
gen fhpinflt=fhpin11 + fhpin33 + fhpin39 ;
gen fhpoutflt=fhpout11 + fhpout33 + fhpout39 ;
gen fhppri=0 ;
gen fhppar=0 ;

gen hrslet=hrs04 + hrs07 + hrs12 + hrs32  ;
gen hrsflt=hrs11 + hrs33 + hrs39 ;
gen hrspri =0 ;
gen hrspar =0 ;

gen tphlet=tph04 + tph07 + tph12 + tph32 ;
gen tphflt=tph11 + tph33 + tph39 ;
gen tphpri=0 ;
gen tphpar=0 ;

gen tpflet=tpf04 + tpf07 + tpf12 + tpf32 ;
gen tpfflt=tpf11 + tpf33 + tpf39 ;
gen tpfpri=0 ;
gen tpfpar=0 ;

/*  generate wages - will generate missing value when hours = 0 or missing  */

gen wage11=dollars11/hours11 ;
gen wage12=dollars12/hours12 ;
gen wage14=dollars14/hours14 ;
gen wage13=dollars13/hours13 ;
gen wage17=dollars17/hours17 ;
gen wlet=log(wage11) ;  /*  used for  ocr, bcs, bcs/dbcs operators  */
gen wfsm=log(wage12) ;  /*  used for fsm and afsm operators   */
gen wman=log(wage14) ;  /*  used for manual letters and flats  */
gen wpar=log(wage13) ;  /*  used for mechanized parcels and priority */
gen wcancel=log(wage17) ;  /*  used for cancellation  */

/*  K data from Bozzo data sets.  Scale the 4th quarter and create some aggregate K stocks by shape  */

replace qifsm881 = .75* qifsm881 if qtr==4   ;
replace qifsm1000 = .75* qifsm1000 if qtr==4   ;
replace qiafsm100 = .75* qiafsm100 if qtr==4   ;
replace qimpbcs = .75* qimpbcs if qtr==4   ;
replace qidbcs = .75* qidbcs if qtr==4   ;
replace qiocr = .75* qiocr if qtr==4   ;
replace qipse = .75* qipse if qtr==4   ;
replace qiahe = .75* qiahe if qtr==4   ;
replace qimhe = .75* qimhe if qtr==4   ;
replace qicap2 = .75* qicap2 if qtr==4   ;
replace qicanc = .75* qicanc if qtr==4  ; 
gen qifsm = qifsm881 + qifsm1000 ;
gen qiflt = qifsm881 + qifsm1000 + qiafsm100 ;
gen qibcs = qimpbcs + qidbcs ;
gen qilet = qimpbcs + qidbcs + qiocr ;

/*  divide all K data by 1,000,000  */

replace qifsm881 =   qifsm881/1000000   ;
replace qifsm1000 =  qifsm1000/1000000   ;
replace qiafsm100 =  qiafsm100/1000000   ;
replace qimpbcs =    qimpbcs/1000000   ;
replace qidbcs = qidbcs/1000000   ;
replace qiocr =  qiocr/1000000   ;
replace qipse =  qipse/1000000   ;
replace qiahe =  qiahe/1000000   ;
replace qimhe =  qimhe/1000000   ;
replace qicap2 = qicap2/1000000   ;
replace qicanc = qicanc/1000000  ; 
replace qifsm =  qifsm/1000000 ;
replace qiflt =  qiflt/1000000 ;
replace qibcs =  qibcs/1000000 ;
replace qilet =  qilet/1000000 ;

 /*  create log variables for all hours, tpf, fhp for letters and flats */

gen lhlet=log(hrslet) ;
gen lhflt=log(hrsflt) ;
gen lhrs01=log(hrs01) ;
gen lhrs02=log(hrs02) ;
gen lhrs04=log(hrs04) ;
gen lhrs05=log(hrs05) ;
gen lhrs06=log(hrs06) ;
gen lhrs07=log(hrs07) ;
gen lhrs09=log(hrs09) ;
gen lhrs10=log(hrs10) ;
gen lhrs11=log(hrs11) ;
gen lhrs12=log(hrs12) ;
gen lhrs13=log(hrs13) ;
gen lhrs14=log(hrs14) ;
gen lhrs18=log(hrs18) ;
gen lhrs19=log(hrs19) ;
gen lhrs32=log(hrs32) ;
gen lhrs33=log(hrs33) ;
gen lhrs34=log(hrs34) ;
gen lhrs36=log(hrs36) ;
gen lhrs39=log(hrs39) ;
gen lhrs40=log(hrs40) ;
gen lfhplet=log(fhplet) ;
gen lfhpflt=log(fhpflt) ;
gen lfhpinlet=log(fhpinlet) ;
gen lfhpoutlet=log(fhpoutlet) ;
gen lfhpinflt=log(fhpinflt) ;
gen lfhpoutflt=log(fhpoutflt) ;
gen ltpf01=log(tpf01) ;
gen ltpf02=log(tpf02) ;
gen ltpf04=log(tpf04) ;
gen ltpf05=log(tpf05) ;
gen ltpf06=log(tpf06) ;
gen ltpf07=log(tpf07) ;
gen ltpf09=log(tpf09) ;
gen ltpf10=log(tpf10) ;
gen ltpf11=log(tpf11) ;
gen ltpf12=log(tpf12) ;
gen ltpf13=log(tpf13) ;
gen ltpf14=log(tpf14) ;
gen ltpf18=log(tpf18) ;
gen ltpf19=log(tpf19) ;
gen ltpf32=log(tpf32) ;
gen ltpf33=log(tpf33) ;
gen ltpf34=log(tpf34) ;
gen ltpf36=log(tpf36) ;
gen ltpf39=log(tpf39) ;
gen ltpf40=log(tpf40) ;
gen ltpflet =log(tpflet) ;
gen ltpfflt =log(tpfflt) ;
gen lkflt = log(tpf33 + tpf39) ;
gen lklet = log(tpf32 + tpf04) ;


/*  create relative wages - normalized by manual  */

gen rwlet = wlet-wman ;
gen rwfsm = wfsm-wman ;
gen rwpar = wpar-wman ;

/*  technology indicators  - based on tpf=0  */

gen tech01=0 ;
gen tech02=0 ;
gen tech05=0 ;
gen tech06=0 ;
gen tech39=0 ;
replace tech01=1 if tpf01>0 ;
replace tech02=1 if tpf02>0 ;
replace tech05=1 if tpf05>0 ;
replace tech06=1 if tpf06>0 ;
replace tech39=1 if tpf39>0 ;

/*  changes over time in the technology indicators- identify periods when the 
         technology changes  */

gen dtech01= tech01 - tech01[_n-1] ;
gen dtech02= tech02 - tech02[_n-1] ;
gen dtech05= tech05 - tech05[_n-1] ;
gen dtech06= tech06 - tech06[_n-1] ;
gen dtech39= tech39 - tech39[_n-1] ;

/* identify observations within a year of MPBCS (category 01) being dropped */

sort idnum trend ;
gen neard01=0 ;
replace neard01=1 if trend<=20 & (tpf01==0 | tpf01[_n+1]==0 | tpf01[_n+2]==0 | tpf01[_n+3]==0 | tpf01[_n+4]==0)  ;
replace neard01=1 if trend==21 & (tpf01==0 | tpf01[_n+1]==0 | tpf01[_n+2]==0 | tpf01[_n+3]==0 ) ;
replace neard01=1 if trend==22 & (tpf01==0 | tpf01[_n+1]==0 | tpf01[_n+2]==0 ) ;
replace neard01=1 if trend==23 & (tpf01==0 | tpf01[_n+1]==0 ) ;
replace neard01=1 if trend==24 & (tpf01==0 ) ;

/* identify observations within a year of FSM881 (category 05) being dropped */

gen neard05=0 ;
replace neard05=1 if trend<=20 & (tpf05==0 | tpf05[_n+1]==0 | tpf05[_n+2]==0 | tpf05[_n+3]==0 | tpf05[_n+4]==0)  ;
replace neard05=1 if trend==21 & (tpf05==0 | tpf05[_n+1]==0 | tpf05[_n+2]==0 | tpf05[_n+3]==0 ) ;
replace neard05=1 if trend==22 & (tpf05==0 | tpf05[_n+1]==0 | tpf05[_n+2]==0 ) ;
replace neard05=1 if trend==23 & (tpf05==0 | tpf05[_n+1]==0 ) ;
replace neard05=1 if trend==24 & (tpf05==0 ) ;

/*  identify observations at the beginning of the introduction of AFSM (39) - tpf39=0 in first 4 obs for all */

gen new39=0 ;
replace new39=1 if tpf39==0 | tpf39[_n-1]==0 | tpf39[_n-2]==0 | tpf39[_n-3]==0 | tpf39[_n-4]==0 ;

/*  identify observations that have hrs>0 but qicapital=0 in an operation.  these observations should not
      be used in estimation - it is usually the case that the capital data has not been reported.  */
      
gen okbcs =1 ; replace okbcs=0 if hrs32>0 & qibcs==0 ;
gen okfsm =1 ; replace okfsm=0 if hrs33>0 & qifsm==0 ;
gen okafsm=1 ; replace okafsm=0 if hrs39>0 & qiafsm==0 ;
gen okocr =1 ; replace okocr=0 if hrs04>0 & qiocr==0 ;

/*  output to a data set used for estimation */

sort idnum fy qtr ;
save regest.dta, replace ;


