- List of built-in data sets
data() - The R Datasets Package
help(package = "datasets") - List the data sets in all *available* packages
data(package = .packages(all.available = TRUE)) - List data sets including in ggplot2 package
data(package = "ggplot2") - Describe data set "movies" which is included in the "psych" package
library(psych)
describe(movies) - Draw a pie chart using different colours schemes
library(ggplot2)
par(mfrow = c(2,2)) # plot 4 pie charts in one diagram
pie(rep(1,8), col = FALSE, main = "Blank Pie") # no colours shown
pie(rep(1,8), main = "Default Colours")
pie(c(1,2,3,4,5,6,7,8), col = heat.colors(7), main = "Heat Colours")
pie(rep(1,8), col = rainbow(8), main = "Rainbow Colours") - EDA
library(ggplot2)
library(ggplot2movies)
library(psych)
data("movies") # using movies data
dim(movies)
summary(movies)
str(movies)
head(movies)
describe(movies)
movies$rating #list values of rating column (variable) - Plot a histogram of ratings and length for a random sample of 1000 movies
library(ggplot2)
library(ggplot2movies)
library(ggpubr) # for ggrange()
movies_1000_rows <- movies[sample(nrow(movies),1000),]
rating_htg <- qplot(rating, data = movies_1000_rows, geom = "histogram", main = "Movie Ratings")
lenth_htg <- qplot(length, data = movies_1000_rows, geom = "histogram", main = "Movie Length")
# Display two histograms in the same figure
ggarrange(rating_htg, lenth_htg, labels = c("A", "B"), ncol = 2, nrow = 1) - Working with Data frames
movies$unused_column <- NULL #remove column unused_column
medianRating <- median(movies$rating)
movies$median_rating_col <- ifelse(movies$rating > medianRating, "Larger", "Smaller")
OR to replacing rating column with "Larger" or "Smaller" values
movies$rating <- with(movies, ifelse(rating > medianRating, "Larger", "Smaller") - # Summarise a data frame by groups in R using dplyr package
library(ggplot2movies)
library(dplyr) # for group_by(), select(), summarise()
# List number of movies produced by each year
mymovies <- movies %>%
select(year, length, title) %>%
group_by(year) %>%
summarise(avg_length=mean(length), number_of_titles=n()) - Using apply function
apply(movies, 2, mean) - #Find all functions that has apply in the end
apropos(".apply")
Friday, January 10, 2020
Working with R language Day 01
Subscribe to:
Posts (Atom)
Mounting USB drives in Windows Subsystem for Linux
Windows Subsystem for Linux can use (mount): SD card USB drives CD drives (CDFS) Network drives UNC paths Local storage / drives Drives form...
-
I. Five different ways to answer a question II. Use SOLO strategies to explain our thinking and reasoning III. SOLO Taxono...
-
Learning levels 1, 2, and 3 Learning levels 4, 5, and 6 References http://www.cccs.edu/Docs/Foundation/SUN/QUESTIONS%20FOR%20TH...