1. Getting packages and data. iris is a data frame with 150 rows and five variables (columns).
library(tidyverse)
data(iris)
  1. Creating iris 1 by filtering for only rows that meet the desired criteria.
iris1 <- filter(iris, Species%in% c("virginica","versicolor"),Sepal.Width > 2.5,Sepal.Length>6) 
  1. Creating iris 2 that only includes desired columns using the select function
iris2 <- select(iris1, Species, Sepal.Length, Sepal.Width)
  1. Orders the data by descending sepal length.
iris3 <- arrange(iris2, by=desc(Sepal.Length))
  1. Adding a sepal area (L x W) to the previous data frame. Uses mutate to provide the formula and create the new column simultaneously.
iris4 <- mutate(iris3, Sepal.Area=Sepal.Length*Sepal.Width)
  1. Creates a small data frame that calculates averages.
iris5 <- summarize(iris4, average.sepal.length= mean(Sepal.Length), average.sepal.width=mean(Sepal.Width),sample.size=n())
print(iris5)
##   average.sepal.length average.sepal.width sample.size
## 1             6.698214            3.041071          56
  1. Now we do averages by species.
iris6 <- group_by(iris4, Species)
iris6 <- summarize(iris6, average.sepal.length= mean(Sepal.Length), average.sepal.width=mean(Sepal.Width),sample.size=n())
print(iris6)
## # A tibble: 2 × 4
##   Species    average.sepal.length average.sepal.width sample.size
##   <fct>                     <dbl>               <dbl>       <int>
## 1 versicolor                 6.48                2.99          17
## 2 virginica                  6.79                3.06          39
  1. This chunk shows how using pipe statements can speed things up.
irisFinal <- iris%>% 
  filter(Species%in% c("virginica","versicolor"),Sepal.Width > 2.5,Sepal.Length>6)%>% 
  select(Species, Sepal.Length, Sepal.Width)%>%
  arrange(by=desc(Sepal.Length))%>%
  mutate(Sepal.Area=Sepal.Length*Sepal.Width)%>%
  group_by(Species)%>%
  summarize(average.sepal.length= mean(Sepal.Length), average.sepal.width=mean(Sepal.Width),sample.size=n())

irisFinal==iris6  #checking my work
##      Species average.sepal.length average.sepal.width sample.size
## [1,]    TRUE                 TRUE                TRUE        TRUE
## [2,]    TRUE                 TRUE                TRUE        TRUE
  1. Formating the original iris data frame “the long way” using pivot functions.
irislonger <- iris%>%
  select(Species,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width)%>%
  pivot_longer(cols = 2:5, names_to = "Measure", values_to = "Value")