#Load the data d<-read.csv("http://www.dimiter.eu/Visualizations_files/imdb/ratings.csv") #or get your won file d<-subset(d,d$Title.type=='Feature Film') # get the filter films only #Variable transformations mine.j<-jitter(d$You.rated, amount=0.3) #rename and apply some jitter to make the dots visible in the plot imdb.j<-jitter(d$IMDb.Rating, amount=0.05) #rename and apply some jitter to make the dots visible in the plot ###Some ugly code to assign a single genre to a movie for (i in 1: max(nrow(d))){ temp<-strsplit(as.character(d$Genres[i]), ",") d[i, "Genre.N"]<-length(temp[[1]]) for (j in 1:length(temp[[1]])){ d[i,paste("Genre",j,sep=".")]<-temp[[1]][j] } } d$Genre.1<-as.factor(gsub(" ","", d$Genre.1)) d$Genre.2<-as.factor(gsub(" ","", d$Genre.2)) d$Genre.3<-as.factor(gsub(" ","", d$Genre.3)) d$Genre.4<-as.factor(gsub(" ","", d$Genre.4)) d$Genre.5<-as.factor(gsub(" ","", d$Genre.5)) d$Genre.1 <- as.factor(ifelse(is.na(d$Genre.1)==T, "0",as.character(d$Genre.1))) d$Genre.2 <- as.factor(ifelse(is.na(d$Genre.2)==T, "0",as.character(d$Genre.2))) d$Genre.3 <- as.factor(ifelse(is.na(d$Genre.3)==T, "0",as.character(d$Genre.3))) d$Genre.4 <- as.factor(ifelse(is.na(d$Genre.4)==T, "0",as.character(d$Genre.4))) d$Genre.5 <- as.factor(ifelse(is.na(d$Genre.5)==T, "0",as.character(d$Genre.5))) t<-unique(c(as.character(unique(d["Genre.1"])$Genre.1), as.character(unique(d["Genre.2"])$Genre.2), as.character(unique(d["Genre.3"])$Genre.3),as.character(unique(d["Genre.4"])$Genre.4), as.character(unique(d["Genre.5"])$Genre.5))) t<-t[-c(8,9,11,13,14,15, 16,18,19,20,21,22)] for (i in 1:length(t)){ for (j in 1: nrow(d)){ if (d[j,"Genre.1"]==t[i] | d[j,"Genre.2"]==t[i] | d[j,"Genre.3"]==t[i] | d[j,"Genre.4"]==t[i] | d[j,"Genre.5"]==t[i]) d[j,paste(t[i],"","")]<-1 else d[j,paste(t[i],"","")]<-0 } } for (i in 1:nrow(d)){ if (d$adventure[i]==1) d$short.genre[i]<-'adventure' else if (d$sci_fi[i]==1) d$short.genre[i]<-'sci_fi' else if (d$biography[i]==1) d$short.genre[i]<-'biography' else if (d$mystery[i]==1) d$short.genre[i]<-'mystery' else if (d$thriller[i]==1) d$short.genre[i]<-'thriller' else if (d$crime[i]==1) d$short.genre[i]<-'crime' else if (d$drama[i]==1) d$short.genre[i]<-'drama' else if (d$comedy[i]==1) d$short.genre[i]<-'comdedy' else if (d$romance[i]==1) d$short.genre[i]<-'romance' else if (d$action[i]==1) d$short.genre[i]<-'action' else d$short.genre[i]<-'other' } #Recode the years d$year<-as.factor(ifelse( d$Year<1970, '1960s and before',ifelse( d$Year<1980, '1970s',ifelse( d$Year<1990, '1980s',ifelse( d$Year<2000, '1990s',ifelse( d$Year<2010, '2000s',ifelse( d$Year<2011, '2010',ifelse( d$Year<2012, '2011',ifelse( d$Year<2013, '2012',ifelse( d$Year<2014, '2013',ifelse( d$Year<2015, '2014',NA)))))))))) ) #Get the visualization library library(devtools) install_github("clickme", "nachocab") library(clickme) ###Big plot with title (year) clickme(points, x = imdb.j, y = mine.j, title = "Scatterplot of general and personal movie ratings", subtitle = "(hover over a dot to reveal the title. filter by year from the menu on the right)", formats = list(x = ".1f", y = ".0f"), opacity = .8, jitter = 0.4, radius = 5, height = 400, width = 900, formats = list(x = ".0f", y = ".0f"), xlab = "Internet Movie Database (IMDb) rating", ylab = "My rating", names = d$Title, color_groups = d$year, title = "", file = "imdb_dots_9y.html") ###Big plot with title (genre) clickme(points, x = imdb.j, y = mine.j, title = "Scatterplot of general and personal movie ratings", subtitle = "(hover over a dot to reveal the title. filter by genre from the menu on the right)", formats = list(x = ".1f", y = ".0f"), opacity = .8, jitter = 0.4, radius = 5, height = 400, width = 900, formats = list(x = ".0f", y = ".0f"), xlab = "Internet Movie Database (IMDb) rating", ylab = "My rating", names = d$Title, color_groups = short.genre, title = "", file = "imdb_dots_9.html") ###Mid-size plot without title clickme(points, x = imdb.j, y = mine.j, formats = list(x = ".1f", y = ".0f"), opacity = .8, jitter = 0.4, radius = 5, height = 420, width = 600, formats = list(x = ".0f", y = ".0f"), xlab = "Internet Movie Database (IMDb) rating", ylab = "My rating", names = d$Title, color_groups = short.genre, title = "", file = "imdb_dots_6.html") ###Mid-size per decade plot without title clickme(points, x = imdb.j, y = mine.j, formats = list(x = ".1f", y = ".0f"), opacity = .8, jitter = 0.4, radius = 5, height = 420, width = 600, formats = list(x = ".0f", y = ".0f"), xlab = "Internet Movie Database (IMDb) rating", ylab = "My rating", names = d$Title, color_groups = d$year, title = "", file = "imdb_dots_6y.html")