TimeEval-algorithms/stomp/algorithm.r at main · TimeEval/TimeEval-algorithms · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#!/usr/bin/env Rscript

library(tsmp)
library(jsonlite)

`%or%` = function(a, b) {
    cmp = function(a,b) if (
        identical(a, FALSE) || is.null(a) ||
        is.na(a) || is.nan(a) || length(a) == 0
    ) b else a

    if (length(a) > 1)
        mapply(cmp, a, b)
    else
        cmp(a, b)
}

args <- commandArgs(trailingOnly = TRUE)
if (is.null(args) || is.na(args) || is.nan(args) || length(args) == 0) {
    stop("No arguments specified!")
}

config = fromJSON(args)
input <- config$dataInput %or% "/data/dataset.csv"
output <- config$dataOutput %or% "/results/anomaly_scores.csv"
# ignore modelInput and modelOutput, because they are not needed
executionType <- config$executionType %or% "execute"
window_size <- config$customParameters$anomaly_window_size %or% 30
exclusion_zone <- config$customParameters$exclusion_zone %or% 1/2
verbose <- config$customParameters$verbose %or% 1
n_jobs <- config$customParameters$n_jobs %or% 1
random_state <- config$customParameters$random_state %or% 42
use_column_index <- config$customParameters$use_column_index %or% 0
# align index to R-indexing, which is 1-based
use_column_index <- use_column_index + 1

# Set random seed
set.seed(random_state)

# check parameters
if (window_size < 4) {
  message("WARN: window_size must be at least 4. Dynamically fixing it by setting window_size to 4")
  window_size <- 4
}

if (verbose > 1) {
    message("-- Configuration ------------")
    message("executionType=", executionType)
    message("window_Size=", window_size)
    message("exclusion_zone=", exclusion_zone)
    message("verbose=", verbose)
    message("n_jobs=", n_jobs)
    message("-----------------------------")
}

if (executionType != "execute") {
    message("Training not required. Finished!")
    quit()
}


message("Reading data from ", input)
data <- read.csv(file=input)

max_column_index <- ncol(data) - 2
if (use_column_index > max_column_index) {
    message("Selected column index ",
        use_column_index,
        " is out of bounds (max index = ",
        max_column_index,
        ")! Using last channel!"
    )
    use_column_index <- max_column_index
}
# jump over index column (timestamp)
use_column_index <- use_column_index + 1
values = data[, use_column_index] # Attention: 1-based indexing!

if (n_jobs <= 1) {
    stomp_mp <- stomp(values, window_size=window_size, exclusion_zone=exclusion_zone, verbose=verbose)
} else {
    stomp_mp <- stomp_par(values, window_size=window_size, exclusion_zone=exclusion_zone, verbose=verbose, n_workers=n_jobs)
}
result <- stomp_mp$mp[,1]

message("Writing results to ", output)
write.table(result, file=output, sep=",", eol="\n", row.names = FALSE, col.names = FALSE, fileEncoding="UTF-8")