Directions

This is the Way

Data for demo

Back to the spellbook

1. Load data

Load and check.

star_wars_planets <- read.csv("star_wars_planets_v5.csv", header = TRUE)
head(star_wars_planets)
##       Name                Region         Sector        System Inhabitants
## 1 Aargonar               Unknown        Unknown       Unknown     Unknown
## 2   Abafar Outer Rim Territories Sprizen sector       Unknown     Unknown
## 3 Abednedo              Colonies        Unknown       Unknown    Abednedo
## 4   Absanz               Unknown        Unknown       Unknown     Unknown
## 5  Affadar               Unknown        Unknown       Unknown     T'Laeem
## 6   Agamar Outer Rim Territories        Unknown Agamar system     Unknown
##   Capital_City Population Record_High_Temp Record_Low_Temp Gravity Key_Element
## 1      Unknown    3857305               36              -5    8.35           6
## 2      Unknown    2331308               33             -27   11.40          28
## 3      Unknown    5358387               28              -5    9.55          30
## 4      Unknown    7229751               38              -6   10.80          57
## 5      Unknown    4307513               37             -31    9.75          22
## 6      Unknown    6191940               38              -2    9.00          34
##   Rotation Revolution Rating Rating_Nom Resources Resources_Nom
## 1    59.89       2112      7        Yay         8       Awesome
## 2    49.20       1818      2      Yucks         9       Awesome
## 3   127.76       1210      3      Yucks         5          Blah
## 4   145.98       1431      9        Yay         3          Blah
## 5    58.51       1060      9        Yay         7       Awesome
## 6   148.23       1450      5        Meh         4          Blah
summary(star_wars_planets)
##      Name              Region             Sector             System         
##  Length:683         Length:683         Length:683         Length:683        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  Inhabitants        Capital_City         Population      Record_High_Temp
##  Length:683         Length:683         Min.   :1014877   Min.   :  0.00  
##  Class :character   Class :character   1st Qu.:3226102   1st Qu.: 10.00  
##  Mode  :character   Mode  :character   Median :5458429   Median : 21.00  
##                                        Mean   :5522815   Mean   : 20.96  
##                                        3rd Qu.:7777546   3rd Qu.: 31.00  
##                                        Max.   :9973030   Max.   :100.00  
##  Record_Low_Temp     Gravity        Key_Element        Rotation     
##  Min.   :-60.00   Min.   : 7.000   Min.   :  1.00   Min.   : 10.07  
##  1st Qu.:-30.00   1st Qu.: 8.280   1st Qu.: 28.00   1st Qu.: 64.32  
##  Median :-20.00   Median : 9.570   Median : 55.00   Median :125.10  
##  Mean   :-19.57   Mean   : 9.542   Mean   : 53.62   Mean   :127.44  
##  3rd Qu.: -9.00   3rd Qu.:10.805   3rd Qu.: 80.00   3rd Qu.:194.54  
##  Max.   : 30.00   Max.   :12.000   Max.   :105.00   Max.   :249.23  
##    Revolution         Rating       Rating_Nom          Resources    
##  Min.   : 100.0   Min.   :1.000   Length:683         Min.   :1.000  
##  1st Qu.: 740.5   1st Qu.:3.000   Class :character   1st Qu.:3.000  
##  Median :1315.0   Median :5.000   Mode  :character   Median :5.000  
##  Mean   :1334.5   Mean   :5.081                      Mean   :5.152  
##  3rd Qu.:1961.0   3rd Qu.:7.000                      3rd Qu.:7.000  
##  Max.   :2499.0   Max.   :9.000                      Max.   :9.000  
##  Resources_Nom     
##  Length:683        
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
nrow(star_wars_planets)
## [1] 683
str(star_wars_planets)
## 'data.frame':    683 obs. of  17 variables:
##  $ Name            : chr  "Aargonar" "Abafar" "Abednedo" "Absanz" ...
##  $ Region          : chr  "Unknown" "Outer Rim Territories" "Colonies" "Unknown" ...
##  $ Sector          : chr  "Unknown" "Sprizen sector" "Unknown" "Unknown" ...
##  $ System          : chr  "Unknown" "Unknown" "Unknown" "Unknown" ...
##  $ Inhabitants     : chr  "Unknown" "Unknown" "Abednedo" "Unknown" ...
##  $ Capital_City    : chr  "Unknown" "Unknown" "Unknown" "Unknown" ...
##  $ Population      : int  3857305 2331308 5358387 7229751 4307513 6191940 9622189 4432983 1125034 6244208 ...
##  $ Record_High_Temp: int  36 33 28 38 37 38 29 39 33 40 ...
##  $ Record_Low_Temp : int  -5 -27 -5 -6 -31 -2 -35 -6 -16 -15 ...
##  $ Gravity         : num  8.35 11.4 9.55 10.8 9.75 ...
##  $ Key_Element     : int  6 28 30 57 22 34 105 57 37 74 ...
##  $ Rotation        : num  59.9 49.2 127.8 146 58.5 ...
##  $ Revolution      : int  2112 1818 1210 1431 1060 1450 2245 440 494 1999 ...
##  $ Rating          : int  7 2 3 9 9 5 5 4 8 2 ...
##  $ Rating_Nom      : chr  "Yay" "Yucks" "Yucks" "Yay" ...
##  $ Resources       : int  8 9 5 3 7 4 5 8 4 4 ...
##  $ Resources_Nom   : chr  "Awesome" "Awesome" "Blah" "Blah" ...
names(star_wars_planets)
##  [1] "Name"             "Region"           "Sector"           "System"          
##  [5] "Inhabitants"      "Capital_City"     "Population"       "Record_High_Temp"
##  [9] "Record_Low_Temp"  "Gravity"          "Key_Element"      "Rotation"        
## [13] "Revolution"       "Rating"           "Rating_Nom"       "Resources"       
## [17] "Resources_Nom"

2. Filter variables for clustering

Take the variables needed.

star_wars_planets_filter <- subset(star_wars_planets[c(8:9, 13)])
names(star_wars_planets_filter)
## [1] "Record_High_Temp" "Record_Low_Temp"  "Revolution"
str(star_wars_planets_filter)
## 'data.frame':    683 obs. of  3 variables:
##  $ Record_High_Temp: int  36 33 28 38 37 38 29 39 33 40 ...
##  $ Record_Low_Temp : int  -5 -27 -5 -6 -31 -2 -35 -6 -16 -15 ...
##  $ Revolution      : int  2112 1818 1210 1431 1060 1450 2245 440 494 1999 ...

3. Data exploration

Explore the worlds.

library(ggplot2)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
ggplot(star_wars_planets_filter) + aes(y = Record_High_Temp) +
  geom_boxplot(fill = "#C6C6FF") + ylab("High Temperature") + 
  ggtitle("Record high temperature") + theme_classic()

ggplot(star_wars_planets_filter) + aes(Record_High_Temp) +
  geom_histogram(fill = "#C6C6FF") + ylab("Count") +
  ggtitle("Histogram of size record high temperature") + theme_classic()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

summary(star_wars_planets_filter$Record_High_Temp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   10.00   21.00   20.96   31.00  100.00
ggplot(star_wars_planets_filter) + aes(y = Record_Low_Temp) +
  geom_boxplot(fill = "#C6C6CD") + ylab("Low temperature") + 
  ggtitle("Record low temperature") + theme_dark()

ggplot(star_wars_planets_filter) + aes(Record_Low_Temp) +
  geom_histogram(fill = "#C6C6CD") + ylab("Count") + 
  ggtitle("Histogram of record low temperature") + theme_dark()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

summary(star_wars_planets_filter$Record_Low_Temp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -60.00  -30.00  -20.00  -19.57   -9.00   30.00
ggplot(star_wars_planets_filter) + aes(y = Revolution) +
  geom_boxplot(fill = "#C6C666") + ylab("Revolution") + 
  ggtitle("Length of year") + theme_linedraw()

ggplot(star_wars_planets_filter) + aes(Revolution) +
  geom_histogram(fill = "#C6C666") + ylab("Count") + 
  ggtitle("Histogram of length of year") + theme_linedraw()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

summary(star_wars_planets_filter$Revolution)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   100.0   740.5  1315.0  1334.5  1961.0  2499.0

4. Normalise data

Normalise.

star_wars_planets_filter_norm <- sapply(star_wars_planets_filter, scale)
head(star_wars_planets_filter_norm)
##      Record_High_Temp Record_Low_Temp Revolution
## [1,]        1.2242449       1.1579100  1.1162594
## [2,]        0.9800395      -0.5900060  0.6941853
## [3,]        0.5730305       1.1579100 -0.1786756
## [4,]        1.3870485       1.0784592  0.1385979
## [5,]        1.3056467      -0.9078089 -0.3940195
## [6,]        1.3870485       1.3962621  0.1658748

5. k means using k = 3

Using k = 3

star_wars_planets_filter_norm_kmeans_1 <- kmeans(star_wars_planets_filter_norm, 3)

str(star_wars_planets_filter_norm_kmeans_1)
## List of 9
##  $ cluster     : int [1:683] 1 2 1 1 3 1 2 1 3 1 ...
##  $ centers     : num [1:3, 1:3] 0.533 -0.191 -0.295 1.045 -0.645 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:3] "1" "2" "3"
##   .. ..$ : chr [1:3] "Record_High_Temp" "Record_Low_Temp" "Revolution"
##  $ totss       : num 2046
##  $ withinss    : num [1:3] 398 352 437
##  $ tot.withinss: num 1186
##  $ betweenss   : num 860
##  $ size        : int [1:3] 217 209 257
##  $ iter        : int 3
##  $ ifault      : int 0
##  - attr(*, "class")= chr "kmeans"
star_wars_planets_filter$Cluster <- star_wars_planets_filter_norm_kmeans_1$cluster

head(star_wars_planets_filter)
##   Record_High_Temp Record_Low_Temp Revolution Cluster
## 1               36              -5       2112       1
## 2               33             -27       1818       2
## 3               28              -5       1210       1
## 4               38              -6       1431       1
## 5               37             -31       1060       3
## 6               38              -2       1450       1

5.1 Cluster characteristics

5.1.1 Pivot table

star_wars_planets_w_cluster_df_1 <- aggregate(
  star_wars_planets_filter[, -4], by = list(star_wars_planets_filter$Cluster),
  FUN = mean)

names(star_wars_planets_w_cluster_df_1)[1] <- "Cluster"

star_wars_planets_w_cluster_df_1
##   Cluster Record_High_Temp Record_Low_Temp Revolution
## 1       1         27.50691       -6.423963  1511.7880
## 2       2         18.61244      -27.688995  1959.3158
## 3       3         17.34241      -24.077821   676.5759

5.1.2 Pivot chart

library(reshape2)


star_wars_planets_w_cluster_df_1_long <- melt(
  star_wars_planets_w_cluster_df_1,
  id.vars = c("Cluster"))

star_wars_planets_w_cluster_df_1_long
##   Cluster         variable       value
## 1       1 Record_High_Temp   27.506912
## 2       2 Record_High_Temp   18.612440
## 3       3 Record_High_Temp   17.342412
## 4       1  Record_Low_Temp   -6.423963
## 5       2  Record_Low_Temp  -27.688995
## 6       3  Record_Low_Temp  -24.077821
## 7       1       Revolution 1511.788018
## 8       2       Revolution 1959.315789
## 9       3       Revolution  676.575875
names(star_wars_planets_w_cluster_df_1_long)[c(2,3)] <- c("Variable", "Mean")

head(star_wars_planets_w_cluster_df_1_long)
##   Cluster         Variable       Mean
## 1       1 Record_High_Temp  27.506912
## 2       2 Record_High_Temp  18.612440
## 3       3 Record_High_Temp  17.342412
## 4       1  Record_Low_Temp  -6.423963
## 5       2  Record_Low_Temp -27.688995
## 6       3  Record_Low_Temp -24.077821
ggplot(star_wars_planets_w_cluster_df_1_long) + 
  aes(x = Variable, y = Mean, fill = Cluster) +
  geom_bar(stat = "identity", position = "dodge") +
  coord_flip() +
  facet_wrap(~ Cluster) +
  theme(legend.position = "none") +
  ggtitle("Cluster characteristics")

5.2 Use normalised set for better representation

star_wars_planets_filter_norm <- as.data.frame(star_wars_planets_filter_norm)
head(star_wars_planets_filter_norm)
##   Record_High_Temp Record_Low_Temp Revolution
## 1        1.2242449       1.1579100  1.1162594
## 2        0.9800395      -0.5900060  0.6941853
## 3        0.5730305       1.1579100 -0.1786756
## 4        1.3870485       1.0784592  0.1385979
## 5        1.3056467      -0.9078089 -0.3940195
## 6        1.3870485       1.3962621  0.1658748
star_wars_planets_filter_norm$Cluster <- star_wars_planets_filter_norm_kmeans_1$cluster

head(star_wars_planets_filter_norm)
##   Record_High_Temp Record_Low_Temp Revolution Cluster
## 1        1.2242449       1.1579100  1.1162594       1
## 2        0.9800395      -0.5900060  0.6941853       2
## 3        0.5730305       1.1579100 -0.1786756       1
## 4        1.3870485       1.0784592  0.1385979       1
## 5        1.3056467      -0.9078089 -0.3940195       3
## 6        1.3870485       1.3962621  0.1658748       1
star_wars_planets_w_cluster_df_1_norm <- aggregate(
  star_wars_planets_filter_norm[, -4], by = list(star_wars_planets_filter_norm$Cluster),
  FUN = mean)

names(star_wars_planets_w_cluster_df_1_norm)[1] <- "Cluster"

star_wars_planets_w_cluster_df_1_norm
##   Cluster Record_High_Temp Record_Low_Temp Revolution
## 1       1        0.5328923       1.0447751  0.2545793
## 2       2       -0.1911337      -0.6447471  0.8970619
## 3       3       -0.2945163      -0.3578367 -0.9444733
library(reshape2)

star_wars_planets_w_cluster_df_1_norm_long <- melt(star_wars_planets_w_cluster_df_1_norm, id.vars = c("Cluster"))
star_wars_planets_w_cluster_df_1_norm_long
##   Cluster         variable      value
## 1       1 Record_High_Temp  0.5328923
## 2       2 Record_High_Temp -0.1911337
## 3       3 Record_High_Temp -0.2945163
## 4       1  Record_Low_Temp  1.0447751
## 5       2  Record_Low_Temp -0.6447471
## 6       3  Record_Low_Temp -0.3578367
## 7       1       Revolution  0.2545793
## 8       2       Revolution  0.8970619
## 9       3       Revolution -0.9444733
names(star_wars_planets_w_cluster_df_1_norm_long)[c(2:3)] <- c("Variable", "Mean")

head(star_wars_planets_w_cluster_df_1_norm_long)
##   Cluster         Variable       Mean
## 1       1 Record_High_Temp  0.5328923
## 2       2 Record_High_Temp -0.1911337
## 3       3 Record_High_Temp -0.2945163
## 4       1  Record_Low_Temp  1.0447751
## 5       2  Record_Low_Temp -0.6447471
## 6       3  Record_Low_Temp -0.3578367
ggplot(star_wars_planets_w_cluster_df_1_norm_long) + 
  aes(x = Variable, y = Mean, fill = Cluster) +
  geom_bar(stat = "identity", position = "dodge") +
  coord_flip() +
  facet_wrap(~ Cluster) +
  theme(legend.position = "none") +
  ggtitle("Cluster characteristics k = 3")

6. Check clustering validity

library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
# fviz_nbclust(x, FUNcluster, method = c("silhouette", "wss", "gap_stat"))
# x: numeric matrix or data frame
# FUNcluster: a partitioning function. Allowed values include kmeans, pam, clara and hcut (for hierarchical clustering).
# method: the method to be used for determining the optimal number of clusters.

fviz_nbclust(star_wars_planets_filter_norm, 
             kmeans, method = "wss") +
  geom_vline(xintercept = 4, linetype = 2)+
  labs(subtitle = "Elbow method")

fviz_nbclust(star_wars_planets_filter_norm, 
             kmeans, method = "silhouette") +
  labs(subtitle = "Silhouette method")

7. k means using k = 4

star_wars_planets_filter_norm_kmeans_2 <- kmeans(star_wars_planets_filter_norm, 4)

str(star_wars_planets_filter_norm_kmeans_2)
## List of 9
##  $ cluster     : int [1:683] 1 3 1 1 4 1 3 1 4 1 ...
##  $ centers     : num [1:4, 1:4] 0.533 -0.909 -0.241 0.649 1.045 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:4] "1" "2" "3" "4"
##   .. ..$ : chr [1:4] "Record_High_Temp" "Record_Low_Temp" "Revolution" "Cluster"
##  $ totss       : num 2518
##  $ withinss    : num [1:4] 398 176 330 117
##  $ tot.withinss: num 1021
##  $ betweenss   : num 1496
##  $ size        : int [1:4] 217 153 202 111
##  $ iter        : int 4
##  $ ifault      : int 0
##  - attr(*, "class")= chr "kmeans"
star_wars_planets_filter$Cluster <- star_wars_planets_filter_norm_kmeans_2$cluster

head(star_wars_planets_filter)
##   Record_High_Temp Record_Low_Temp Revolution Cluster
## 1               36              -5       2112       1
## 2               33             -27       1818       3
## 3               28              -5       1210       1
## 4               38              -6       1431       1
## 5               37             -31       1060       4
## 6               38              -2       1450       1

7.1 Cluster characteristics

7.1.1 Pivot table

aggregate(star_wars_planets_filter[, -4], 
          by = list(star_wars_planets_filter$Cluster),
          FUN = mean)
##   Group.1 Record_High_Temp Record_Low_Temp Revolution
## 1       1         27.50691       -6.423963  1511.7880
## 2       2          9.79085      -21.379085   729.3072
## 3       3         18.00495      -27.529703  1979.2376
## 4       4         28.93694      -28.315315   648.5315

7.1.2 Pivot chart

star_wars_planets_w_cluster_df_2 <- aggregate(star_wars_planets_filter[, -4], 
                                              by = list(star_wars_planets_filter$Cluster),
                                              FUN = mean)

star_wars_planets_w_cluster_df_2
##   Group.1 Record_High_Temp Record_Low_Temp Revolution
## 1       1         27.50691       -6.423963  1511.7880
## 2       2          9.79085      -21.379085   729.3072
## 3       3         18.00495      -27.529703  1979.2376
## 4       4         28.93694      -28.315315   648.5315
names(star_wars_planets_w_cluster_df_2)[1] <- "Cluster"

star_wars_planets_w_cluster_df_2
##   Cluster Record_High_Temp Record_Low_Temp Revolution
## 1       1         27.50691       -6.423963  1511.7880
## 2       2          9.79085      -21.379085   729.3072
## 3       3         18.00495      -27.529703  1979.2376
## 4       4         28.93694      -28.315315   648.5315
library(reshape2)


star_wars_planets_w_cluster_df_2_long <- melt(
  star_wars_planets_w_cluster_df_2,
  id.vars = c("Cluster"))

star_wars_planets_w_cluster_df_2_long
##    Cluster         variable       value
## 1        1 Record_High_Temp   27.506912
## 2        2 Record_High_Temp    9.790850
## 3        3 Record_High_Temp   18.004950
## 4        4 Record_High_Temp   28.936937
## 5        1  Record_Low_Temp   -6.423963
## 6        2  Record_Low_Temp  -21.379085
## 7        3  Record_Low_Temp  -27.529703
## 8        4  Record_Low_Temp  -28.315315
## 9        1       Revolution 1511.788018
## 10       2       Revolution  729.307190
## 11       3       Revolution 1979.237624
## 12       4       Revolution  648.531532
names(star_wars_planets_w_cluster_df_2_long)[c(2,3)] <- c("Variable", "Mean")

head(star_wars_planets_w_cluster_df_2_long)
##   Cluster         Variable       Mean
## 1       1 Record_High_Temp  27.506912
## 2       2 Record_High_Temp   9.790850
## 3       3 Record_High_Temp  18.004950
## 4       4 Record_High_Temp  28.936937
## 5       1  Record_Low_Temp  -6.423963
## 6       2  Record_Low_Temp -21.379085
ggplot(star_wars_planets_w_cluster_df_2_long) + 
  aes(x = Variable, y = Mean, fill = Cluster) +
  geom_bar(stat = "identity", position = "dodge") +
  coord_flip() +
  facet_wrap(~ Cluster) +
  theme(legend.position = "none") +
  ggtitle("Cluster characteristics")

7.2 Use normalised set for better representation

star_wars_planets_filter_norm$Cluster <- star_wars_planets_filter_norm_kmeans_2$cluster

head(star_wars_planets_filter_norm)
##   Record_High_Temp Record_Low_Temp Revolution Cluster
## 1        1.2242449       1.1579100  1.1162594       1
## 2        0.9800395      -0.5900060  0.6941853       3
## 3        0.5730305       1.1579100 -0.1786756       1
## 4        1.3870485       1.0784592  0.1385979       1
## 5        1.3056467      -0.9078089 -0.3940195       4
## 6        1.3870485       1.3962621  0.1658748       1
star_wars_planets_w_cluster_df_2_norm <-
  aggregate(star_wars_planets_filter_norm[, -4],
            by = list(star_wars_planets_filter_norm$Cluster),
            FUN = mean)
names(star_wars_planets_w_cluster_df_2_norm)[1] <- "Cluster"



star_wars_planets_filter_norm_long_2 <- melt(star_wars_planets_w_cluster_df_2_norm,
                                             id.vars = c("Cluster"))
head(star_wars_planets_filter_norm_long_2)
##   Cluster         variable      value
## 1       1 Record_High_Temp  0.5328923
## 2       2 Record_High_Temp -0.9092271
## 3       3 Record_High_Temp -0.2405845
## 4       4 Record_High_Temp  0.6492989
## 5       1  Record_Low_Temp  1.0447751
## 6       2  Record_Low_Temp -0.1434202
names(star_wars_planets_filter_norm_long_2)[c(2:3)] <- c("Variable", "Mean")

head(star_wars_planets_filter_norm_long_2)
##   Cluster         Variable       Mean
## 1       1 Record_High_Temp  0.5328923
## 2       2 Record_High_Temp -0.9092271
## 3       3 Record_High_Temp -0.2405845
## 4       4 Record_High_Temp  0.6492989
## 5       1  Record_Low_Temp  1.0447751
## 6       2  Record_Low_Temp -0.1434202
ggplot(star_wars_planets_filter_norm_long_2) + 
  aes(x = Variable, y = Mean, fill = Cluster) +
  geom_bar(stat = "identity", position = "dodge") +
  coord_flip() +
  facet_wrap(~ Cluster) +
  theme(legend.position = "none") +
  ggtitle("Cluster characteristics, k = 4")

7.3 Scatter plot

star_wars_planets_filter$Cluster <- as.factor(star_wars_planets_filter$Cluster)

ggplot(star_wars_planets_filter) + aes(x = Revolution, y = Record_Low_Temp,
                                       shape = Cluster, color = Cluster) +
  geom_point()

ggplot(star_wars_planets_filter) + aes(x = Revolution, y = Record_High_Temp,
                                       shape = Cluster, color = Cluster) +
  geom_point()

library(plotly)

plot_ly(x = star_wars_planets_filter$Revolution, 
        y = star_wars_planets_filter$Record_Low_Temp,
        z = star_wars_planets_filter$Record_High_Temp,
        type = "scatter3d", mode = "markers", color = star_wars_planets_filter$Cluster) %>%
   layout(title = "Clustered Star Wars Planets",
          scene = list(xaxis = list(title = "Revolution"),
                       yaxis = list(title = "Record Low Temp"),
                       zaxis = list(title = "Record High Temp")),
                       legend = list(title = list(text= "<b>Cluster</b>")))