Assignment 4

Download the required R packages for this assignment
require(tidyverse)  
## Loading required package: tidyverse
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.0 
## ✔ readr   2.1.2      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
require(dplyr)
Import the csv file of interest MBT_ebird
Note: Make sure you are in the correct working directory. If your file does not load use this to check your working directory
getwd()
## [1] "C:/Users/emily/Documents/Toomey's R Seminar/BierbaumBIOL7263"
Import the MBT_ebird csv file, which is located in your results folder
MBT_ebird<-read_csv("Data/MBT_ebird.csv") 
## New names:
## Rows: 6595 Columns: 14
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (4): list_ID, common_name, scientific_name, location dbl (8): ...1, count,
## duration, latitude, longitude, count_tot, month, year date (1): date time (1):
## time
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
Examine the csv file
glimpse(MBT_ebird)
## Rows: 6,595
## Columns: 14
## $ ...1            <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,…
## $ list_ID         <chr> "S40748758", "S33616660", "S33809874", "S35533959", "S…
## $ common_name     <chr> "Snow Goose", "Snow Goose", "Snow Goose", "Snow Goose"…
## $ scientific_name <chr> "Anser caerulescens", "Anser caerulescens", "Anser cae…
## $ date            <date> 2017-11-26, 2017-01-12, 2017-01-20, 2017-03-30, 2017-…
## $ time            <time> 10:28:00, 07:00:00, 16:26:00, 07:05:00, 07:00:00, 18:…
## $ count           <dbl> 16, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 26, 30, 6, 31,…
## $ duration        <dbl> 20, 90, 59, 100, 127, 68, 109, 98, 173, 45, 118, 85, 1…
## $ location        <chr> "US-MO", "US-MO", "US-MO", "US-MO", "US-MO", "US-MO", …
## $ latitude        <dbl> 38.87193, 38.63891, 38.63891, 38.63891, 38.63891, 38.6…
## $ longitude       <dbl> -90.18439, -90.28538, -90.28538, -90.28538, -90.28538,…
## $ count_tot       <dbl> 369, 272, 188, 283, 369, 28, 247, 237, 137, 114, 108, …
## $ month           <dbl> 11, 1, 1, 3, 4, 4, 4, 4, 4, 5, 6, 8, 1, 2, 12, 11, 11,…
## $ year            <dbl> 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, …

Problem 1: In which year did he observe the most individual birds? How many?

bird_total<-MBT_ebird %>%  
  group_by(year) %>%  
  summarize(Bird_Num=sum(count))

arrange (bird_total, by = desc(Bird_Num))
## # A tibble: 13 × 2
##     year Bird_Num
##    <dbl>    <dbl>
##  1  2014     9303
##  2  2020     8941
##  3  2017     6102
##  4  2021     3713
##  5  2015     3219
##  6  2018     1738
##  7  2022     1371
##  8  2004     1116
##  9  2019      677
## 10  2016      379
## 11  2013      151
## 12  2003       49
## 13  2009       25

ANSWER: 2014; 9303 individuals

Problem 2: In 2014, how many different species of birds did he observe?

Species_of_Birds<-MBT_ebird %>%     
  filter(year == 2014) %>%    
  count(common_name)  

count(Species_of_Birds, "common_name") 
## # A tibble: 1 × 2
##   `"common_name"`     n
##   <chr>           <int>
## 1 common_name       210

ANSWER: 210

Problem 3: In which state did he most frequently observe Red-winged Blackbirds?

RWBB<-MBT_ebird %>%  
  filter(common_name == "Red-winged Blackbird") %>%  
  group_by(location) %>%  
  summarize(State_num=sum(count_tot))  
  
arrange (RWBB, by = desc(State_num))
## # A tibble: 5 × 2
##   location State_num
##   <chr>        <dbl>
## 1 US-MO         8443
## 2 US-OK         6861
## 3 US-VT          391
## 4 US-FL          168
## 5 US-IL           30

ANSWER: Missouri

Problem 4: Filter observations for a duration between 5-200 minutes. Calculate the mean rate per checklist that he encountered species each year. Calculate the number of species in each checklist divided by duration and then take the mean for the year*

Bird_Duration <-MBT_ebird %>%  
  filter(duration >= 5 & duration <= 200) %>%  
  group_by(list_ID)  
Create the new variable rate
Bird_Duration <- mutate(Bird_Duration, rate=(count/duration))  

ungroup(Bird_Duration)  
## # A tibble: 5,829 × 15
##     ...1 list_ID  commo…¹ scien…² date       time  count durat…³ locat…⁴ latit…⁵
##    <dbl> <chr>    <chr>   <chr>   <date>     <tim> <dbl>   <dbl> <chr>     <dbl>
##  1     1 S407487… Snow G… Anser … 2017-11-26 10:28    16      20 US-MO      38.9
##  2     2 S336166… Snow G… Anser … 2017-01-12 07:00     1      90 US-MO      38.6
##  3     3 S338098… Snow G… Anser … 2017-01-20 16:26     1      59 US-MO      38.6
##  4     4 S355339… Snow G… Anser … 2017-03-30 07:05     1     100 US-MO      38.6
##  5     5 S356980… Snow G… Anser … 2017-04-04 07:00     1     127 US-MO      38.6
##  6     6 S358612… Snow G… Anser … 2017-04-10 18:06     1      68 US-MO      38.6
##  7     7 S359184… Snow G… Anser … 2017-04-13 06:59     1     109 US-MO      38.6
##  8     8 S361181… Snow G… Anser … 2017-04-20 07:00     1      98 US-MO      38.6
##  9     9 S361989… Snow G… Anser … 2017-04-23 08:13     1     173 US-MO      38.6
## 10    10 S368405… Snow G… Anser … 2017-05-13 18:00     1      45 US-MO      38.6
## # … with 5,819 more rows, 5 more variables: longitude <dbl>, count_tot <dbl>,
## #   month <dbl>, year <dbl>, rate <dbl>, and abbreviated variable names
## #   ¹​common_name, ²​scientific_name, ³​duration, ⁴​location, ⁵​latitude
Bird_Duration<-group_by(Bird_Duration, year)  

summarize(Bird_Duration, mean_value=mean(rate)) 
## # A tibble: 13 × 2
##     year mean_value
##    <dbl>      <dbl>
##  1  2003     0.0185
##  2  2004     0.0777
##  3  2009     0.0521
##  4  2013     0.0390
##  5  2014     0.0978
##  6  2015     0.123 
##  7  2016     0.0483
##  8  2017     0.0847
##  9  2018     0.0595
## 10  2019     0.0311
## 11  2020     0.542 
## 12  2021     0.0782
## 13  2022     0.218