require(tidyverse)
## Loading required package: tidyverse
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
require(dplyr)
MBT_ebird
getwd()
## [1] "C:/Users/emily/Documents/Toomey's R Seminar/BierbaumBIOL7263"
MBT_ebird<-read_csv("Data/MBT_ebird.csv")
## New names:
## Rows: 6595 Columns: 14
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (4): list_ID, common_name, scientific_name, location dbl (8): ...1, count,
## duration, latitude, longitude, count_tot, month, year date (1): date time (1):
## time
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
glimpse(MBT_ebird)
## Rows: 6,595
## Columns: 14
## $ ...1 <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,…
## $ list_ID <chr> "S40748758", "S33616660", "S33809874", "S35533959", "S…
## $ common_name <chr> "Snow Goose", "Snow Goose", "Snow Goose", "Snow Goose"…
## $ scientific_name <chr> "Anser caerulescens", "Anser caerulescens", "Anser cae…
## $ date <date> 2017-11-26, 2017-01-12, 2017-01-20, 2017-03-30, 2017-…
## $ time <time> 10:28:00, 07:00:00, 16:26:00, 07:05:00, 07:00:00, 18:…
## $ count <dbl> 16, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 26, 30, 6, 31,…
## $ duration <dbl> 20, 90, 59, 100, 127, 68, 109, 98, 173, 45, 118, 85, 1…
## $ location <chr> "US-MO", "US-MO", "US-MO", "US-MO", "US-MO", "US-MO", …
## $ latitude <dbl> 38.87193, 38.63891, 38.63891, 38.63891, 38.63891, 38.6…
## $ longitude <dbl> -90.18439, -90.28538, -90.28538, -90.28538, -90.28538,…
## $ count_tot <dbl> 369, 272, 188, 283, 369, 28, 247, 237, 137, 114, 108, …
## $ month <dbl> 11, 1, 1, 3, 4, 4, 4, 4, 4, 5, 6, 8, 1, 2, 12, 11, 11,…
## $ year <dbl> 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, …
bird_total<-MBT_ebird %>%
group_by(year) %>%
summarize(Bird_Num=sum(count))
arrange (bird_total, by = desc(Bird_Num))
## # A tibble: 13 × 2
## year Bird_Num
## <dbl> <dbl>
## 1 2014 9303
## 2 2020 8941
## 3 2017 6102
## 4 2021 3713
## 5 2015 3219
## 6 2018 1738
## 7 2022 1371
## 8 2004 1116
## 9 2019 677
## 10 2016 379
## 11 2013 151
## 12 2003 49
## 13 2009 25
Species_of_Birds<-MBT_ebird %>%
filter(year == 2014) %>%
count(common_name)
count(Species_of_Birds, "common_name")
## # A tibble: 1 × 2
## `"common_name"` n
## <chr> <int>
## 1 common_name 210
RWBB<-MBT_ebird %>%
filter(common_name == "Red-winged Blackbird") %>%
group_by(location) %>%
summarize(State_num=sum(count_tot))
arrange (RWBB, by = desc(State_num))
## # A tibble: 5 × 2
## location State_num
## <chr> <dbl>
## 1 US-MO 8443
## 2 US-OK 6861
## 3 US-VT 391
## 4 US-FL 168
## 5 US-IL 30
Bird_Duration <-MBT_ebird %>%
filter(duration >= 5 & duration <= 200) %>%
group_by(list_ID)
Bird_Duration <- mutate(Bird_Duration, rate=(count/duration))
ungroup(Bird_Duration)
## # A tibble: 5,829 × 15
## ...1 list_ID commo…¹ scien…² date time count durat…³ locat…⁴ latit…⁵
## <dbl> <chr> <chr> <chr> <date> <tim> <dbl> <dbl> <chr> <dbl>
## 1 1 S407487… Snow G… Anser … 2017-11-26 10:28 16 20 US-MO 38.9
## 2 2 S336166… Snow G… Anser … 2017-01-12 07:00 1 90 US-MO 38.6
## 3 3 S338098… Snow G… Anser … 2017-01-20 16:26 1 59 US-MO 38.6
## 4 4 S355339… Snow G… Anser … 2017-03-30 07:05 1 100 US-MO 38.6
## 5 5 S356980… Snow G… Anser … 2017-04-04 07:00 1 127 US-MO 38.6
## 6 6 S358612… Snow G… Anser … 2017-04-10 18:06 1 68 US-MO 38.6
## 7 7 S359184… Snow G… Anser … 2017-04-13 06:59 1 109 US-MO 38.6
## 8 8 S361181… Snow G… Anser … 2017-04-20 07:00 1 98 US-MO 38.6
## 9 9 S361989… Snow G… Anser … 2017-04-23 08:13 1 173 US-MO 38.6
## 10 10 S368405… Snow G… Anser … 2017-05-13 18:00 1 45 US-MO 38.6
## # … with 5,819 more rows, 5 more variables: longitude <dbl>, count_tot <dbl>,
## # month <dbl>, year <dbl>, rate <dbl>, and abbreviated variable names
## # ¹common_name, ²scientific_name, ³duration, ⁴location, ⁵latitude
Bird_Duration<-group_by(Bird_Duration, year)
summarize(Bird_Duration, mean_value=mean(rate))
## # A tibble: 13 × 2
## year mean_value
## <dbl> <dbl>
## 1 2003 0.0185
## 2 2004 0.0777
## 3 2009 0.0521
## 4 2013 0.0390
## 5 2014 0.0978
## 6 2015 0.123
## 7 2016 0.0483
## 8 2017 0.0847
## 9 2018 0.0595
## 10 2019 0.0311
## 11 2020 0.542
## 12 2021 0.0782
## 13 2022 0.218
Top_10_Birds<-MBT_ebird %>%
group_by(common_name) %>%
mutate(frequency = sum(count)) %>%
slice(which.max(frequency))
Top_10<- head(arrange(Top_10_Birds, by = desc(frequency)),10) %>%
group_by(scientific_name, frequency) %>%
filter(frequency == max(frequency, na.rm=TRUE))
write_csv(Top_10, "Results/Top_10_Birds.csv")