- Getting packages and data. iris is a data frame with 150 rows and
five variables (columns).
library(tidyverse)
data(iris)
- Creating iris 1 by filtering for only rows that meet the desired
criteria.
iris1 <- filter(iris, Species%in% c("virginica","versicolor"),Sepal.Width > 2.5,Sepal.Length>6)
- Creating iris 2 that only includes desired columns using the select
function
iris2 <- select(iris1, Species, Sepal.Length, Sepal.Width)
- Orders the data by descending sepal length.
iris3 <- arrange(iris2, by=desc(Sepal.Length))
- Adding a sepal area (L x W) to the previous data frame. Uses mutate
to provide the formula and create the new column simultaneously.
iris4 <- mutate(iris3, Sepal.Area=Sepal.Length*Sepal.Width)
- Creates a small data frame that calculates averages.
iris5 <- summarize(iris4, average.sepal.length= mean(Sepal.Length), average.sepal.width=mean(Sepal.Width),sample.size=n())
print(iris5)
## average.sepal.length average.sepal.width sample.size
## 1 6.698214 3.041071 56
- Now we do averages by species.
iris6 <- group_by(iris4, Species)
iris6 <- summarize(iris6, average.sepal.length= mean(Sepal.Length), average.sepal.width=mean(Sepal.Width),sample.size=n())
print(iris6)
## # A tibble: 2 × 4
## Species average.sepal.length average.sepal.width sample.size
## <fct> <dbl> <dbl> <int>
## 1 versicolor 6.48 2.99 17
## 2 virginica 6.79 3.06 39
- This chunk shows how using pipe statements can speed things up.
irisFinal <- iris%>%
filter(Species%in% c("virginica","versicolor"),Sepal.Width > 2.5,Sepal.Length>6)%>%
select(Species, Sepal.Length, Sepal.Width)%>%
arrange(by=desc(Sepal.Length))%>%
mutate(Sepal.Area=Sepal.Length*Sepal.Width)%>%
group_by(Species)%>%
summarize(average.sepal.length= mean(Sepal.Length), average.sepal.width=mean(Sepal.Width),sample.size=n())
irisFinal==iris6 #checking my work
## Species average.sepal.length average.sepal.width sample.size
## [1,] TRUE TRUE TRUE TRUE
## [2,] TRUE TRUE TRUE TRUE
- Formating the original iris data frame “the long way” using pivot
functions.
irislonger <- iris%>%
select(Species,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width)%>%
pivot_longer(cols = 2:5, names_to = "Measure", values_to = "Value")