/************************ Robocall Study insheet2008.do file created: March 4, 2012 file updated: March 11, 2012 This program takes the 2008 csv files (which have had candidate names replaced with party names) turns them into .dta files, merges them together into one big .dta file, and calculates some basic variables, such as voter turnout. *************************/ /********* 1. Inputs and settings *********/ clear clear matrix clear mata cd "C:\Users\Anke\Dropbox\robocalls\2008\\" local path "C:\Users\Anke\Dropbox\robocalls\2008\\" set mem 800m set more off /******** 2. create filelist in other directory********/ ! dir *.csv /a-d /b > "\Users\Tom\Documents\economics\projects\robocalls\code\filelist.csv" /******** 3. open new file and read into this file the list********/ file open myfile using "\Users\Tom\Documents\economics\projects\robocalls\code\filelist.csv", read file read myfile line insheet using `line', comma names gen district_id = substr("`line'",26,5) qui describe local ncand = `r(k)' - 8 rename electoraldistrictnumbernumrodeci id rename electoraldistrictnamenomdecircon name renvars pollingstationnamenomdubureaudes-rejectedballotsbulletinsrejets\pollingstationname v1-v`ncand' rejectedballots drop if missing(v2) destring(v1), replace mat A = J(`ncand', 1, 0) forvalues n = 1/`ncand' { qui sum v`n' mat A[`n',1] = `r(sum)' } mata: st_matrix("A", sort(st_matrix("A"), 1)) gen winnervotes = A[`ncand',1] gen secondvotes = A[`ncand'-1,1] gen thirdvotes = A[`ncand'-2,1] gen votemargin = winnervotes - secondvotes egen totalvotes=sum(totalvotestotaldesvotes) egen invalid=sum( rejectedballots) egen voters=sum(electorslecteurs) gen perc=invalid/totalvotes gen turnout=totalvotes/voters keep totalvotes turnout invalid perc id name winnervotes secondvotes thirdvotes votemargin duplicates drop id, force save `line'.dta, replace save master_data2008.dta, replace drop _all disp("`line'") file read myfile line /******** 4. Repeat and combine ********/ while r(eof)==0 { insheet using `line', comma names gen district_id = substr("`line'",26,5) qui describe local ncand = `r(k)' - 8 rename electoraldistrictnumbernumrodeci id rename electoraldistrictnamenomdecircon name renvars pollingstationnamenomdubureaudes-rejectedballotsbulletinsrejets\pollingstationname v1-v`ncand' rejectedballots drop if missing(v2) destring(v1), replace mat A = J(`ncand', 1, 0) forvalues n = 1/`ncand' { qui sum v`n' mat A[`n',1] = `r(sum)' } mata: st_matrix("A", sort(st_matrix("A"), 1)) gen winnervotes = A[`ncand',1] gen secondvotes = A[`ncand'-1,1] gen thirdvotes = A[`ncand'-2,1] gen votemargin = winnervotes - secondvotes egen totalvotes=sum(totalvotestotaldesvotes) egen invalid=sum( rejectedballots) egen voters=sum(electorslecteurs) gen perc=invalid/totalvotes gen turnout=totalvotes/voters keep totalvotes turnout invalid perc id name winnervotes secondvotes thirdvotes votemargin duplicates drop id, force save `line'.dta, replace append using master_data2008.dta save master_data2008.dta, replace drop _all file read myfile line cd "\Users\Tom\Documents\economics\projects\robocalls\2008\" } use master_data2008.dta gen year = 2008 save master_data2008.dta, replace file close myfile