<- function(cusips,
clean_enhanced_trace
connection,start_date = as.Date("2002-01-01"),
end_date = today()) {
# Packages (required)
library(dplyr)
library(lubridate)
library(dbplyr)
library(RPostgres)
# Function checks ---------------------------------------------------------
# Input parameters
## Cusips
if (length(cusips) == 0 | any(is.na(cusips))) stop("Check cusips.")
## Dates
if (!is.Date(start_date) | !is.Date(end_date)) stop("Dates needed")
if (start_date < as.Date("2002-01-01")) stop("TRACE starts later.")
if (end_date > today()) stop("TRACE does not predict the future.")
if (start_date >= end_date) stop("Date conflict.")
## Connection
if (!dbIsValid(connection)) stop("Connection issue.")
# Enhanced Trace ----------------------------------------------------------
# Main file
<- tbl(
trace_all
connection,I("trace.trace_enhanced")
|>
) filter(cusip_id %in% cusips) |>
filter(trd_exctn_dt >= start_date & trd_exctn_dt <= end_date) |>
select(
cusip_id, msg_seq_nb, orig_msg_seq_nb,
entrd_vol_qt, rptd_pr, yld_pt, rpt_side_cd, cntra_mp_id,
trd_exctn_dt, trd_exctn_tm, trd_rpt_dt, trd_rpt_tm,
pr_trd_dt, trc_st, asof_cd, wis_fl,
days_to_sttl_ct, stlmnt_dt, spcl_trd_fl|>
) collect()
# Enhanced Trace: Post 06-02-2012 -----------------------------------------
# Trades (trc_st = T) and correction (trc_st = R)
<- trace_all |>
trace_post_TR filter(
== "T" | trc_st == "R"),
(trc_st >= as.Date("2012-02-06")
trd_rpt_dt
)
# Cancellations (trc_st = X) and correction cancellations (trc_st = C)
<- trace_all |>
trace_post_XC filter(
== "X" | trc_st == "C"),
(trc_st >= as.Date("2012-02-06")
trd_rpt_dt
)
# Cleaning corrected and cancelled trades
<- trace_post_TR |>
trace_post_TR anti_join(trace_post_XC,
join_by(
cusip_id, msg_seq_nb, entrd_vol_qt,
rptd_pr, rpt_side_cd, cntra_mp_id,
trd_exctn_dt, trd_exctn_tm
)
)
# Reversals (trc_st = Y)
<- trace_all |>
trace_post_Y filter(
== "Y",
trc_st >= as.Date("2012-02-06")
trd_rpt_dt
)
# Clean reversals
## match the orig_msg_seq_nb of the Y-message to
## the msg_seq_nb of the main message
<- trace_post_TR |>
trace_post anti_join(trace_post_Y,
join_by(
cusip_id,== orig_msg_seq_nb,
msg_seq_nb
entrd_vol_qt, rptd_pr, rpt_side_cd,
cntra_mp_id, trd_exctn_dt, trd_exctn_tm
)
)
# Enhanced TRACE: Pre 06-02-2012 ------------------------------------------
# Cancellations (trc_st = C)
<- trace_all |>
trace_pre_C filter(
== "C",
trc_st < as.Date("2012-02-06")
trd_rpt_dt
)
# Trades w/o cancellations
## match the orig_msg_seq_nb of the C-message
## to the msg_seq_nb of the main message
<- trace_all |>
trace_pre_T filter(
== "T",
trc_st < as.Date("2012-02-06")
trd_rpt_dt |>
) anti_join(trace_pre_C,
join_by(
cusip_id,== orig_msg_seq_nb,
msg_seq_nb
entrd_vol_qt, rptd_pr, rpt_side_cd,
cntra_mp_id, trd_exctn_dt, trd_exctn_tm
)
)
# Corrections (trc_st = W) - W can also correct a previous W
<- trace_all |>
trace_pre_W filter(
== "W",
trc_st < as.Date("2012-02-06")
trd_rpt_dt
)
# Implement corrections in a loop
## Correction control
<- nrow(trace_pre_W)
correction_control <- nrow(trace_pre_W)
correction_control_last
## Correction loop
while (correction_control > 0) {
# Corrections that correct some msg
<- trace_pre_W |>
trace_pre_W_correcting semi_join(trace_pre_T,
join_by(
cusip_id, trd_exctn_dt,== msg_seq_nb
orig_msg_seq_nb
)
)
# Corrections that do not correct some msg
<- trace_pre_W |>
trace_pre_W anti_join(trace_pre_T,
join_by(cusip_id, trd_exctn_dt,
== msg_seq_nb
orig_msg_seq_nb
)
)
# Delete msgs that are corrected and add correction msgs
<- trace_pre_T |>
trace_pre_T anti_join(trace_pre_W_correcting,
join_by(
cusip_id, trd_exctn_dt,== orig_msg_seq_nb
msg_seq_nb
)|>
) union_all(trace_pre_W_correcting)
# Escape if no corrections remain or they cannot be matched
<- nrow(trace_pre_W)
correction_control if (correction_control == correction_control_last) {
<- 0
correction_control
}<- nrow(trace_pre_W)
correction_control_last
}
# Clean reversals
## Record reversals
<- trace_pre_T |>
trace_pre_R filter(asof_cd == "R") |>
group_by(
cusip_id, trd_exctn_dt, entrd_vol_qt,
rptd_pr, rpt_side_cd, cntra_mp_id|>
) arrange(trd_exctn_tm, trd_rpt_dt, trd_rpt_tm) |>
mutate(seq = row_number()) |>
ungroup()
## Remove reversals and the reversed trade
<- trace_pre_T |>
trace_pre filter(is.na(asof_cd) | !(asof_cd %in% c("R", "X", "D"))) |>
group_by(
cusip_id, trd_exctn_dt, entrd_vol_qt,
rptd_pr, rpt_side_cd, cntra_mp_id|>
) arrange(trd_exctn_tm, trd_rpt_dt, trd_rpt_tm) |>
mutate(seq = row_number()) |>
ungroup() |>
anti_join(trace_pre_R,
join_by(
cusip_id, trd_exctn_dt, entrd_vol_qt,
rptd_pr, rpt_side_cd, cntra_mp_id, seq
)|>
) select(-seq)
# Agency trades -----------------------------------------------------------
# Combine pre and post trades
<- trace_post |>
trace_clean union_all(trace_pre)
# Keep angency sells and unmatched agency buys
## Agency sells
<- trace_clean |>
trace_agency_sells filter(
== "D",
cntra_mp_id == "S"
rpt_side_cd
)
# Agency buys that are unmatched
<- trace_clean |>
trace_agency_buys_filtered filter(
== "D",
cntra_mp_id == "B"
rpt_side_cd |>
) anti_join(trace_agency_sells,
join_by(
cusip_id, trd_exctn_dt,
entrd_vol_qt, rptd_pr
)
)
# Agency clean
<- trace_clean |>
trace_clean filter(cntra_mp_id == "C") |>
union_all(trace_agency_sells) |>
union_all(trace_agency_buys_filtered)
# Additional Filters ------------------------------------------------------
<- trace_clean |>
trace_add_filters mutate(days_to_sttl_ct2 = stlmnt_dt - trd_exctn_dt) |>
filter(
is.na(days_to_sttl_ct) | as.numeric(days_to_sttl_ct) <= 7,
is.na(days_to_sttl_ct2) | as.numeric(days_to_sttl_ct2) <= 7,
== "N",
wis_fl is.na(spcl_trd_fl) | spcl_trd_fl == "",
is.na(asof_cd) | asof_cd == ""
)
# Output ------------------------------------------------------------------
# Only keep necessary columns
<- trace_add_filters |>
trace_final arrange(cusip_id, trd_exctn_dt, trd_exctn_tm) |>
select(
cusip_id, trd_exctn_dt, trd_exctn_tm,
rptd_pr, entrd_vol_qt, yld_pt, rpt_side_cd, cntra_mp_id|>
) mutate(trd_exctn_tm = format(
as_datetime(
trd_exctn_tm, tz = "America/New_York"),
"%H:%M:%S"
)
)
# Return
return(trace_final)
}
Clean Enhanced TRACE with R
You are reading Tidy Finance with R. You can find the equivalent chapter for the sibling Tidy Finance with Python here.
This appendix contains code to clean enhanced TRACE with R. It is also available via the following GitHub gist. Hence, you could also source the function with devtools::source_gist("3a05b3ab281563b2e94858451c2eb3a4")
. We need this function in Chapter TRACE and FISD to download and clean enhanced TRACE trade messages following Dick-Nielsen (2009) and Dick-Nielsen (2014) for enhanced TRACE specifically. Relatedly, WRDS provides SAS code and there is Python code available by the project Open Source Bond Asset Pricing.
The function takes a vector of CUSIPs (in cusips
), a connection to WRDS (connection
) explained in Chapter 3, and a start and end date (start_date
and end_date
, respectively). Specifying too many CUSIPs will result in very slow downloads and a potential failure due to the size of the request to WRDS. The dates should be within the coverage of TRACE itself, i.e., starting after 2002, and the dates should be supplied using the class date. The output of the function contains all valid trade messages for the selected CUSIPs over the specified period.