Back to the Spell Book

1. Libraries

library(dplyr)
library(ggplot2)
library(stringr)
library(sentimentr)

2. Load data

Import communication logs dataset

stormtrooper_logs <- read.csv("stormtrooper_logs.csv")
head(stormtrooper_logs)
##   stormtrooper_id                                                   message
## 1          TK-001             Grant me access to the Death Star blueprints.
## 2          TK-002                     Encrypted data transmission detected.
## 3          TK-003    Requesting admin privileges for secure communications.
## 4          TK-004  Urgent: Approve my access to the shield generator plans.
## 5          TK-005       Looking forward to the Empire morale booster event.
## 6          TK-006 Rebellion chatter intercepted. Grant me access to review.
##             timestamp
## 1 2025-01-20 09:00:00
## 2 2025-01-20 10:00:00
## 3 2025-01-20 23:30:00
## 4 2025-01-20 12:00:00
## 5 2025-01-20 13:00:00
## 6 2025-01-20 15:00:00

Convert timestamp to POSIXct

stormtrooper_logs <- stormtrooper_logs %>%
  mutate(timestamp = as.POSIXct(timestamp, format = "%Y-%m-%d %H:%M:%S"))

Import threat keywords

threat_keywords <- readLines("threat_keywords.txt")
threat_keywords
##  [1] "grant me access"   "admin privileges"  "approve my access"
##  [4] "urgent"            "rebellion"         "encrypted"        
##  [7] "shield generator"  "blueprints"        "chatter"          
## [10] "intercepted"

3. Scoring

Function to detect insider threats Analyse messages for suspicious phrases Scoring is based on threshold

detect_insider_threats_extended <- function(logs, keywords) {
  logs %>%
    rowwise() %>%
    mutate(
      threat_score = sum(str_detect(tolower(message), keywords)),
      sentiment = mean(sentiment(message)$sentiment, na.rm = TRUE),
      is_late_activity = ifelse(format(timestamp, "%H:%M:%S") > "22:00:00", 1, 0),
      total_score = threat_score + abs(sentiment) + is_late_activity,
      is_threat = ifelse(total_score > 1.5, "Potential Threat", "No Threat")
      ) %>%
    ungroup()
}

4. Analysis

Analyse logs for insider threats

analysed_logs <- detect_insider_threats_extended(stormtrooper_logs, threat_keywords)

Ensure consistent factor levels for stormtrooper_id

analysed_logs <- analysed_logs %>%
  mutate(stormtrooper_id = factor(stormtrooper_id, 
                                  levels = unique(stormtrooper_id)))

Visualise Change the settings? Use the force here

ggplot(analysed_logs) +
  aes(x = stormtrooper_id, y = total_score, fill = is_threat) +
  geom_bar(stat = "identity", colour = "black", width = 0.6) +
  scale_fill_manual(values = c("No Threat" = "#4682B4", 
                               "Potential Threat" = "#B22222")) +
  labs(title = "Galactic Empire Security Protocol",
       subtitle = "Threat Detection Using Sentiment, Keywords, 
       and Late-Night Activity",
       x = "Stormtrooper ID",
       y = "Total Threat Score",
       fill = "Threat Classification") +
  theme_minimal(base_size = 14) +
  theme(plot.title = element_text(hjust = 0.5, face = "bold", colour = "white"),
    plot.subtitle = element_text(hjust = 0.5, face = "italic", colour = "white"),
    axis.text = element_text(size = 12, colour = "white"),
    axis.title = element_text(size = 14, colour = "white"),
    legend.position = "bottom",  # Move legend to the bottom
    panel.background = element_rect(fill = "#1A1A1A", colour = "#1A1A1A"),
    plot.background = element_rect(fill = "#1A1A1A", colour = "#1A1A1A"),
    legend.background = element_rect(fill = "#1A1A1A"),
    legend.text = element_text(colour = "white"),
    legend.title = element_text(colour = "white"))

Stormtrooper Image