Read and Review Data

Goal= cleaned data frame with average percent coral cover per transect survey, with additional information needed for the master coral database.

# read in original csv file

rc_carib_substrate <- read_csv("sample_data/Reef_Check_Caribbean_Substrate.csv")
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   `Longitude Degrees` = col_double(),
##   `Longitude Minutes` = col_double(),
##   `Longitude Seconds` = col_double(),
##   `Latitude Degrees` = col_double(),
##   `Latitude Minutes` = col_double(),
##   `Latitude Seconds` = col_double(),
##   Year = col_double(),
##   depth = col_double(),
##   total = col_double(),
##   `Errors?` = col_logical()
## )
## See spec(...) for full column specifications.
# clean column names- lowercase, no spaces
rc_carib_substrate <- janitor::clean_names(rc_carib_substrate)

# Look at number of sites and countries in this dataset

length(unique(rc_carib_substrate$reef_name))  #612 unique reefs
## [1] 612
length(unique(rc_carib_substrate$country))  #28 countries
## [1] 28
# substrate_codes
unique(rc_carib_substrate$substrate_code)
##  [1] "HC" "NI" "OT" "RB" "RC" "RK" "SC" "SD" "SI" "SP"
length(unique(rc_carib_substrate$substrate_code))  #10 types
## [1] 10
# Review a sample of data
rc_carib_substrate[1:10, ] %>% kableExtra::kable(caption = "Raw Reef Check Data") %>% 
    kable_styling(bootstrap_options = "condensed", full_width = F)
Raw Reef Check Data
reef_id reef_name longitude_degrees longitude_minutes longitude_seconds longitude_cardinal_direction latitude_degrees latitude_minutes latitude_seconds latitude_cardinal_direction year date depth substrate_code segment_code total errors what_errors country state_province_island city_town ocean
16.22.16W.15.14.21N Champagne Outer 61 22 16 W 15 14 21 N 2006 04-Mar-06 9.1 HC S2 7 TRUE did not complete S4 of substrate Dominica NA NA Atlantic
16.22.16W.15.14.21N Champagne Outer 61 22 16 W 15 14 21 N 2006 04-Mar-06 9.1 HC S1 5 TRUE did not complete S4 of substrate Dominica NA NA Atlantic
16.22.16W.15.14.21N Champagne Outer 61 22 16 W 15 14 21 N 2006 04-Mar-06 9.1 HC S3 8 TRUE did not complete S4 of substrate Dominica NA NA Atlantic
16.22.16W.15.14.21N Champagne Outer 61 22 16 W 15 14 21 N 2006 04-Mar-06 9.1 NI S1 0 TRUE did not complete S4 of substrate Dominica NA NA Atlantic
16.22.16W.15.14.21N Champagne Outer 61 22 16 W 15 14 21 N 2006 04-Mar-06 9.1 NI S2 0 TRUE did not complete S4 of substrate Dominica NA NA Atlantic
16.22.16W.15.14.21N Champagne Outer 61 22 16 W 15 14 21 N 2006 04-Mar-06 9.1 NI S3 0 TRUE did not complete S4 of substrate Dominica NA NA Atlantic
16.22.16W.15.14.21N Champagne Outer 61 22 16 W 15 14 21 N 2006 04-Mar-06 9.1 OT S2 0 TRUE did not complete S4 of substrate Dominica NA NA Atlantic
16.22.16W.15.14.21N Champagne Outer 61 22 16 W 15 14 21 N 2006 04-Mar-06 9.1 OT S3 0 TRUE did not complete S4 of substrate Dominica NA NA Atlantic
16.22.16W.15.14.21N Champagne Outer 61 22 16 W 15 14 21 N 2006 04-Mar-06 9.1 OT S1 0 TRUE did not complete S4 of substrate Dominica NA NA Atlantic
16.22.16W.15.14.21N Champagne Outer 61 22 16 W 15 14 21 N 2006 04-Mar-06 9.1 RB S1 15 TRUE did not complete S4 of substrate Dominica NA NA Atlantic



Cleaning Reef Check Data - Caribbean

# A: percent cover of each substrate code PER SEGMENT (ie out
# of 40 points)
rc_carib_substrate <- rc_carib_substrate %>% group_by(reef_id, 
    year, date, segment_code, substrate_code) %>% mutate(perc_seg = (total/40) * 
    100)

# Note, if try to do % by segment and by survey at same time,
# the values are the same because of the way group_by is
# done-- this is why i separated this process into 2 steps


# B: Percent cover of each substrate code PER SURVEY (out of
# 160 points, or average of all the segments for that survey)
# Note: 8/20/18- Found that for some locations, there was
# more than 1 survey done on a particular day. To keep
# surveys separate, need to also group by 'depth'
rc_carib_substrate <- rc_carib_substrate %>% group_by(reef_id, 
    year, date, substrate_code, depth) %>% mutate(perc_cov = round(mean(perc_seg), 
    2), sd_cov = round(sd(perc_seg), 2), n_samples = length(segment_code))  #this will tell me how many segments were completed!

# C: Clean columns to be in a good format to bind rows to
# master_coral dataframe

rc_carib_to_bind <- rc_carib_substrate %>% # 1 find distinct surveys- for reef check these are the
# unique variables
distinct(reef_id, date, depth, substrate_code, .keep_all = TRUE) %>% 
    
# 2 get rid of columns we no longer need
select(-segment_code, -total, -state_province_island, -city_town, 
    -perc_seg, -errors, -what_errors) %>% 
# 3 filter out the 2 substrate codes of interest, hard coral
# (HC) & algae (NI)
filter(substrate_code == "HC" | substrate_code == "NI") %>% 
# can't use spread on 2 cols, so combo % cov and sd,
# separated by _
unite(temp, perc_cov, sd_cov, sep = "_") %>% 
# 4 use spread to put the % cover of each substrate code into
# its own column
spread(substrate_code, temp) %>% 
# 5 break apart HC and NI data back into cover and sd
separate(HC, into = c("coral", "coral_std"), sep = "_", remove = TRUE) %>% 
    separate(NI, into = c("macro", "macro_std"), sep = "_", remove = TRUE) %>% 
    
# 6 Unite lat and lon col with deg_min_sec together
unite(lat, latitude_degrees, latitude_minutes, latitude_seconds, 
    sep = " ") %>% unite(lon, longitude_degrees, longitude_minutes, 
    longitude_seconds, sep = " ") %>% 
# 7 rename columns
rename(source_id = reef_id, depth_m = depth) %>% mutate(subregion = "caribbean", 
    method = "line_transect", orig_source = "reef_check", data_source = "reef_check", 
    t_length = 20) %>% separate(date, into = c("day", "month", 
    "year"), sep = "-", remove = TRUE)




# 8: Converting the lat and lon from deg_min_sec to dec_deg
# formats. Use measurements package to do the conversion:

rc_carib_to_bind$lat <- measurements::conv_unit(rc_carib_to_bind$lat, 
    "deg_min_sec", "dec_deg")

rc_carib_to_bind$lon <- measurements::conv_unit(rc_carib_to_bind$lon, 
    "deg_min_sec", "dec_deg")

# Make sure to put lat and lon as numeric
rc_carib_to_bind$lon <- as.numeric(rc_carib_to_bind$lon)
rc_carib_to_bind$lat <- as.numeric(rc_carib_to_bind$lat)

# 9: Turn year into 4 digits (note: this data is from
# 2006-2017 so we can just add 20 to the beginning of each
# element)

rc_carib_to_bind$year <- paste(20, rc_carib_to_bind$year, sep = "")
rc_carib_to_bind$day <- as.numeric(rc_carib_to_bind$day)
rc_carib_to_bind$year <- as.numeric(rc_carib_to_bind$year)

# make coral data numeric
rc_carib_to_bind$coral <- as.numeric(rc_carib_to_bind$coral)
rc_carib_to_bind$coral_std <- as.numeric(rc_carib_to_bind$coral_std)
rc_carib_to_bind$macro <- as.numeric(rc_carib_to_bind$macro)
rc_carib_to_bind$macro_std <- as.numeric(rc_carib_to_bind$macro_std)



Check Data

# Preview a sample of data:

rc_carib_to_bind[1:10, ] %>% arrange(reef_name) %>% kableExtra::kable(caption = "Coral and Macroalgae % Cover from Caribbean Reef Check Surveys") %>% 
    kable_styling(bootstrap_options = "condensed", full_width = F)
Coral and Macroalgae % Cover from Caribbean Reef Check Surveys
source_id reef_name lon longitude_cardinal_direction lat latitude_cardinal_direction depth_m day month year country ocean n_samples coral coral_std macro macro_std subregion method orig_source data_source t_length
32.23.48.2W.3.50.5.01S Buraco da Raquel 32.39672 W 3.834722 S 3.0 25 Sep 2008 Brazil Atlantic 4 1.25 2.50 44.38 13.44 caribbean line_transect reef_check reef_check 20
16.22.16W.15.14.21N Champagne Outer 61.37111 W 15.239167 N 9.1 4 Mar 2006 Dominica Atlantic 3 16.67 3.82 0.00 0.00 caribbean line_transect reef_check reef_check 20
16.22.16W.15.14.21N Champagne Outer 61.37111 W 15.239167 N 10.5 8 Apr 2006 Dominica Atlantic 3 22.50 4.33 0.00 0.00 caribbean line_transect reef_check reef_check 20
32.26.27.84W.3.50.38.57S Laje Dois Irmaos Funda 32.44106 W 3.844056 S 21.0 9 Nov 2010 Brazil Atlantic 4 20.62 8.98 36.25 7.77 caribbean line_transect reef_check reef_check 20
32.26.30W.3.50.45S Laje Dois Irmaos Funda 32.44167 W 3.845833 S 23.0 23 Nov 2009 Brazil Atlantic 4 21.88 11.06 6.25 6.29 caribbean line_transect reef_check reef_check 20
32.26.26.9W.3.50.49.09S Laje Dois Irmaos Rasa 32.44081 W 3.846972 S 15.0 21 Sep 2008 Brazil Atlantic 4 46.25 19.31 30.62 23.66 caribbean line_transect reef_check reef_check 20
32.25.21.69W.3.52.9.17N Sueste 32.42269 W 3.869222 S 3.0 13 Nov 2010 Brazil Atlantic 4 0.62 1.25 59.38 12.48 caribbean line_transect reef_check reef_check 20
32.25.21.77W.3.52.5.36S Sueste 32.42272 W 3.868167 S 3.0 19 Sep 2008 Brazil Atlantic 4 3.75 4.79 67.50 16.71 caribbean line_transect reef_check reef_check 20
32.25.21.77W.3.52.5.36S Sueste 32.42272 W 3.868167 S 1.6 20 Nov 2009 Brazil Atlantic 4 3.75 4.79 58.12 10.87 caribbean line_transect reef_check reef_check 20
32.25.30.5W.3.52.5.36S Sueste Trilha 32.42514 W 3.868167 S 2.0 25 Sep 2008 Brazil Atlantic 4 5.62 5.15 56.25 20.97 caribbean line_transect reef_check reef_check 20