Read and Review Data
Goal= cleaned data frame with average percent coral cover per transect survey, with additional information needed for the master coral database.
# read in original csv file
rc_carib_substrate <- read_csv("sample_data/Reef_Check_Caribbean_Substrate.csv")
## Parsed with column specification:
## cols(
## .default = col_character(),
## `Longitude Degrees` = col_double(),
## `Longitude Minutes` = col_double(),
## `Longitude Seconds` = col_double(),
## `Latitude Degrees` = col_double(),
## `Latitude Minutes` = col_double(),
## `Latitude Seconds` = col_double(),
## Year = col_double(),
## depth = col_double(),
## total = col_double(),
## `Errors?` = col_logical()
## )
## See spec(...) for full column specifications.
# clean column names- lowercase, no spaces
rc_carib_substrate <- janitor::clean_names(rc_carib_substrate)
# Look at number of sites and countries in this dataset
length(unique(rc_carib_substrate$reef_name)) #612 unique reefs
## [1] 612
length(unique(rc_carib_substrate$country)) #28 countries
## [1] 28
# substrate_codes
unique(rc_carib_substrate$substrate_code)
## [1] "HC" "NI" "OT" "RB" "RC" "RK" "SC" "SD" "SI" "SP"
length(unique(rc_carib_substrate$substrate_code)) #10 types
## [1] 10
# Review a sample of data
rc_carib_substrate[1:10, ] %>% kableExtra::kable(caption = "Raw Reef Check Data") %>%
kable_styling(bootstrap_options = "condensed", full_width = F)
Raw Reef Check Data
reef_id
|
reef_name
|
longitude_degrees
|
longitude_minutes
|
longitude_seconds
|
longitude_cardinal_direction
|
latitude_degrees
|
latitude_minutes
|
latitude_seconds
|
latitude_cardinal_direction
|
year
|
date
|
depth
|
substrate_code
|
segment_code
|
total
|
errors
|
what_errors
|
country
|
state_province_island
|
city_town
|
ocean
|
16.22.16W.15.14.21N
|
Champagne Outer
|
61
|
22
|
16
|
W
|
15
|
14
|
21
|
N
|
2006
|
04-Mar-06
|
9.1
|
HC
|
S2
|
7
|
TRUE
|
did not complete S4 of substrate
|
Dominica
|
NA
|
NA
|
Atlantic
|
16.22.16W.15.14.21N
|
Champagne Outer
|
61
|
22
|
16
|
W
|
15
|
14
|
21
|
N
|
2006
|
04-Mar-06
|
9.1
|
HC
|
S1
|
5
|
TRUE
|
did not complete S4 of substrate
|
Dominica
|
NA
|
NA
|
Atlantic
|
16.22.16W.15.14.21N
|
Champagne Outer
|
61
|
22
|
16
|
W
|
15
|
14
|
21
|
N
|
2006
|
04-Mar-06
|
9.1
|
HC
|
S3
|
8
|
TRUE
|
did not complete S4 of substrate
|
Dominica
|
NA
|
NA
|
Atlantic
|
16.22.16W.15.14.21N
|
Champagne Outer
|
61
|
22
|
16
|
W
|
15
|
14
|
21
|
N
|
2006
|
04-Mar-06
|
9.1
|
NI
|
S1
|
0
|
TRUE
|
did not complete S4 of substrate
|
Dominica
|
NA
|
NA
|
Atlantic
|
16.22.16W.15.14.21N
|
Champagne Outer
|
61
|
22
|
16
|
W
|
15
|
14
|
21
|
N
|
2006
|
04-Mar-06
|
9.1
|
NI
|
S2
|
0
|
TRUE
|
did not complete S4 of substrate
|
Dominica
|
NA
|
NA
|
Atlantic
|
16.22.16W.15.14.21N
|
Champagne Outer
|
61
|
22
|
16
|
W
|
15
|
14
|
21
|
N
|
2006
|
04-Mar-06
|
9.1
|
NI
|
S3
|
0
|
TRUE
|
did not complete S4 of substrate
|
Dominica
|
NA
|
NA
|
Atlantic
|
16.22.16W.15.14.21N
|
Champagne Outer
|
61
|
22
|
16
|
W
|
15
|
14
|
21
|
N
|
2006
|
04-Mar-06
|
9.1
|
OT
|
S2
|
0
|
TRUE
|
did not complete S4 of substrate
|
Dominica
|
NA
|
NA
|
Atlantic
|
16.22.16W.15.14.21N
|
Champagne Outer
|
61
|
22
|
16
|
W
|
15
|
14
|
21
|
N
|
2006
|
04-Mar-06
|
9.1
|
OT
|
S3
|
0
|
TRUE
|
did not complete S4 of substrate
|
Dominica
|
NA
|
NA
|
Atlantic
|
16.22.16W.15.14.21N
|
Champagne Outer
|
61
|
22
|
16
|
W
|
15
|
14
|
21
|
N
|
2006
|
04-Mar-06
|
9.1
|
OT
|
S1
|
0
|
TRUE
|
did not complete S4 of substrate
|
Dominica
|
NA
|
NA
|
Atlantic
|
16.22.16W.15.14.21N
|
Champagne Outer
|
61
|
22
|
16
|
W
|
15
|
14
|
21
|
N
|
2006
|
04-Mar-06
|
9.1
|
RB
|
S1
|
15
|
TRUE
|
did not complete S4 of substrate
|
Dominica
|
NA
|
NA
|
Atlantic
|
Cleaning Reef Check Data - Caribbean
- Cleaning steps:
- A: Calculate percent cover of each substrate code PER SEGMENT
- 1- Group by reef id, year, data, segment code, substrate code
- 2- Create the “perc_seg” variable that is the % cover of each substrate code per segment
- B: Calculate percent cover of each substrate code PER SURVEY
- 1- Group by reef_id, year, date, and substrate code
- 2- Create variable “perc_cov”, which is the average percent cover for the transect, based off the “perc_seg” column ; Create n_samples by counting the number of segment codes, based on the grouping
- C: Clean columns to bind later with master_coral
- 1- Pick distinct columns to keep
- 2- Remove cols don’t need
- 3- Combine cover and sd data for substrate code- need to do this because can’t use spread on 2 cols
- 4- Spread data so coral and macro have their own column
- 5- Split the cover & sd data into 2 separate columns
- 6- Unite deg_min_sec data into 1 col for lat and lon- convert later
- 7- Rename and mutate some columns
- 8- Convert lat and lon into correct format
- 9- Final column type cleaning (numeric values, create 4 digit years)
# A: percent cover of each substrate code PER SEGMENT (ie out
# of 40 points)
rc_carib_substrate <- rc_carib_substrate %>% group_by(reef_id,
year, date, segment_code, substrate_code) %>% mutate(perc_seg = (total/40) *
100)
# Note, if try to do % by segment and by survey at same time,
# the values are the same because of the way group_by is
# done-- this is why i separated this process into 2 steps
# B: Percent cover of each substrate code PER SURVEY (out of
# 160 points, or average of all the segments for that survey)
# Note: 8/20/18- Found that for some locations, there was
# more than 1 survey done on a particular day. To keep
# surveys separate, need to also group by 'depth'
rc_carib_substrate <- rc_carib_substrate %>% group_by(reef_id,
year, date, substrate_code, depth) %>% mutate(perc_cov = round(mean(perc_seg),
2), sd_cov = round(sd(perc_seg), 2), n_samples = length(segment_code)) #this will tell me how many segments were completed!
# C: Clean columns to be in a good format to bind rows to
# master_coral dataframe
rc_carib_to_bind <- rc_carib_substrate %>% # 1 find distinct surveys- for reef check these are the
# unique variables
distinct(reef_id, date, depth, substrate_code, .keep_all = TRUE) %>%
# 2 get rid of columns we no longer need
select(-segment_code, -total, -state_province_island, -city_town,
-perc_seg, -errors, -what_errors) %>%
# 3 filter out the 2 substrate codes of interest, hard coral
# (HC) & algae (NI)
filter(substrate_code == "HC" | substrate_code == "NI") %>%
# can't use spread on 2 cols, so combo % cov and sd,
# separated by _
unite(temp, perc_cov, sd_cov, sep = "_") %>%
# 4 use spread to put the % cover of each substrate code into
# its own column
spread(substrate_code, temp) %>%
# 5 break apart HC and NI data back into cover and sd
separate(HC, into = c("coral", "coral_std"), sep = "_", remove = TRUE) %>%
separate(NI, into = c("macro", "macro_std"), sep = "_", remove = TRUE) %>%
# 6 Unite lat and lon col with deg_min_sec together
unite(lat, latitude_degrees, latitude_minutes, latitude_seconds,
sep = " ") %>% unite(lon, longitude_degrees, longitude_minutes,
longitude_seconds, sep = " ") %>%
# 7 rename columns
rename(source_id = reef_id, depth_m = depth) %>% mutate(subregion = "caribbean",
method = "line_transect", orig_source = "reef_check", data_source = "reef_check",
t_length = 20) %>% separate(date, into = c("day", "month",
"year"), sep = "-", remove = TRUE)
# 8: Converting the lat and lon from deg_min_sec to dec_deg
# formats. Use measurements package to do the conversion:
rc_carib_to_bind$lat <- measurements::conv_unit(rc_carib_to_bind$lat,
"deg_min_sec", "dec_deg")
rc_carib_to_bind$lon <- measurements::conv_unit(rc_carib_to_bind$lon,
"deg_min_sec", "dec_deg")
# Make sure to put lat and lon as numeric
rc_carib_to_bind$lon <- as.numeric(rc_carib_to_bind$lon)
rc_carib_to_bind$lat <- as.numeric(rc_carib_to_bind$lat)
# 9: Turn year into 4 digits (note: this data is from
# 2006-2017 so we can just add 20 to the beginning of each
# element)
rc_carib_to_bind$year <- paste(20, rc_carib_to_bind$year, sep = "")
rc_carib_to_bind$day <- as.numeric(rc_carib_to_bind$day)
rc_carib_to_bind$year <- as.numeric(rc_carib_to_bind$year)
# make coral data numeric
rc_carib_to_bind$coral <- as.numeric(rc_carib_to_bind$coral)
rc_carib_to_bind$coral_std <- as.numeric(rc_carib_to_bind$coral_std)
rc_carib_to_bind$macro <- as.numeric(rc_carib_to_bind$macro)
rc_carib_to_bind$macro_std <- as.numeric(rc_carib_to_bind$macro_std)
Check Data
# Preview a sample of data:
rc_carib_to_bind[1:10, ] %>% arrange(reef_name) %>% kableExtra::kable(caption = "Coral and Macroalgae % Cover from Caribbean Reef Check Surveys") %>%
kable_styling(bootstrap_options = "condensed", full_width = F)
Coral and Macroalgae % Cover from Caribbean Reef Check Surveys
source_id
|
reef_name
|
lon
|
longitude_cardinal_direction
|
lat
|
latitude_cardinal_direction
|
depth_m
|
day
|
month
|
year
|
country
|
ocean
|
n_samples
|
coral
|
coral_std
|
macro
|
macro_std
|
subregion
|
method
|
orig_source
|
data_source
|
t_length
|
32.23.48.2W.3.50.5.01S
|
Buraco da Raquel
|
32.39672
|
W
|
3.834722
|
S
|
3.0
|
25
|
Sep
|
2008
|
Brazil
|
Atlantic
|
4
|
1.25
|
2.50
|
44.38
|
13.44
|
caribbean
|
line_transect
|
reef_check
|
reef_check
|
20
|
16.22.16W.15.14.21N
|
Champagne Outer
|
61.37111
|
W
|
15.239167
|
N
|
9.1
|
4
|
Mar
|
2006
|
Dominica
|
Atlantic
|
3
|
16.67
|
3.82
|
0.00
|
0.00
|
caribbean
|
line_transect
|
reef_check
|
reef_check
|
20
|
16.22.16W.15.14.21N
|
Champagne Outer
|
61.37111
|
W
|
15.239167
|
N
|
10.5
|
8
|
Apr
|
2006
|
Dominica
|
Atlantic
|
3
|
22.50
|
4.33
|
0.00
|
0.00
|
caribbean
|
line_transect
|
reef_check
|
reef_check
|
20
|
32.26.27.84W.3.50.38.57S
|
Laje Dois Irmaos Funda
|
32.44106
|
W
|
3.844056
|
S
|
21.0
|
9
|
Nov
|
2010
|
Brazil
|
Atlantic
|
4
|
20.62
|
8.98
|
36.25
|
7.77
|
caribbean
|
line_transect
|
reef_check
|
reef_check
|
20
|
32.26.30W.3.50.45S
|
Laje Dois Irmaos Funda
|
32.44167
|
W
|
3.845833
|
S
|
23.0
|
23
|
Nov
|
2009
|
Brazil
|
Atlantic
|
4
|
21.88
|
11.06
|
6.25
|
6.29
|
caribbean
|
line_transect
|
reef_check
|
reef_check
|
20
|
32.26.26.9W.3.50.49.09S
|
Laje Dois Irmaos Rasa
|
32.44081
|
W
|
3.846972
|
S
|
15.0
|
21
|
Sep
|
2008
|
Brazil
|
Atlantic
|
4
|
46.25
|
19.31
|
30.62
|
23.66
|
caribbean
|
line_transect
|
reef_check
|
reef_check
|
20
|
32.25.21.69W.3.52.9.17N
|
Sueste
|
32.42269
|
W
|
3.869222
|
S
|
3.0
|
13
|
Nov
|
2010
|
Brazil
|
Atlantic
|
4
|
0.62
|
1.25
|
59.38
|
12.48
|
caribbean
|
line_transect
|
reef_check
|
reef_check
|
20
|
32.25.21.77W.3.52.5.36S
|
Sueste
|
32.42272
|
W
|
3.868167
|
S
|
3.0
|
19
|
Sep
|
2008
|
Brazil
|
Atlantic
|
4
|
3.75
|
4.79
|
67.50
|
16.71
|
caribbean
|
line_transect
|
reef_check
|
reef_check
|
20
|
32.25.21.77W.3.52.5.36S
|
Sueste
|
32.42272
|
W
|
3.868167
|
S
|
1.6
|
20
|
Nov
|
2009
|
Brazil
|
Atlantic
|
4
|
3.75
|
4.79
|
58.12
|
10.87
|
caribbean
|
line_transect
|
reef_check
|
reef_check
|
20
|
32.25.30.5W.3.52.5.36S
|
Sueste Trilha
|
32.42514
|
W
|
3.868167
|
S
|
2.0
|
25
|
Sep
|
2008
|
Brazil
|
Atlantic
|
4
|
5.62
|
5.15
|
56.25
|
20.97
|
caribbean
|
line_transect
|
reef_check
|
reef_check
|
20
|