Нормализацию забыл, вот так намного устойчивее и точнее...
В принципе приборов похоже две группы.
Код
df.data <- read.csv2("mydat.csv")
str(df.data)
df.data.norm <- (data.frame(lapply(df.data[,!sapply(df.data, is.factor)], scale))+2)[, -c(1,2,3,4,10,12,13)]
library(dplyr)
library(keras)
data.list <- lapply(split(df.data.norm, df.data$id), as.list)
pribor_order <- names(data.list)
names(data.list) <- NULL
data.list <- lapply(data.list, function(l) {names(l) <- NULL; l})
data.list <- lapply(data.list, function(l) lapply(l, as.numeric))
data.list <- lapply(data.list, function(l) as.matrix(data.frame(l)))
## data.list <- lapply(data.list, normalize)
pribor.out <- sapply(pribor_order, function(n) sum(df.data$id == n))
seqLength <- 50
in_seq_event <- pad_sequences(data.list, truncating = 'post', maxlen = seqLength)
## model
drop <- 0.4
K <- backend()
K$clear_session()
main_input <- layer_input(shape = c(seqLength, 18), dtype = 'float32', name = 'main_input')
data_in_norm <- main_input %>%
layer_batch_normalization()
## bidirectional()
lstm_out.forw <- data_in_norm %>%
layer_gru(units = 8,
dropout = 0.2,
recurrent_dropout = 0.2,
return_sequences = T) %>%
layer_batch_normalization() %>%
layer_gru(units = 8,
dropout = 0.2,
recurrent_dropout = 0.2) %>%
layer_batch_normalization()
lstm_out.back <- data_in_norm %>%
layer_gru(units = 8,
dropout = 0.2,
recurrent_dropout = 0.2,
return_sequences = T,
go_backwards=T) %>%
layer_batch_normalization() %>%
layer_gru(units = 8,
dropout = 0.2,
recurrent_dropout = 0.2,
go_backwards=T) %>%
layer_batch_normalization()
main_output <- layer_concatenate(c(lstm_out.forw,
lstm_out.back)) %>%
layer_dense(units = 8, activation = 'relu') %>%
layer_batch_normalization() %>%
layer_dropout(drop) %>%
# layer_dense(units = 8, activation = 'relu') %>%
# layer_batch_normalization() %>%
# layer_dropout(drop) %>%
layer_dense(units = 8, activation = 'relu') %>%
layer_batch_normalization() %>%
layer_dropout(drop) %>%
layer_dense(units = 1, activation = 'linear', name = 'main_output')
model <- keras_model(
inputs = c(main_input),
outputs = c(main_output)
)
model %>%
compile(
## optimizer = 'rmsprop',
## optimizer = 'nadam',
optimizer = 'adam',
loss = 'mean_squared_error'
## loss = 'binary_crossentropy'
## loss_weights = c(1.0, 0.2)
)
summary(model)
history <- model %>%
fit(x = list(in_seq_event),
y = list(pribor.out),
## y = list(labels, labels),
validation_split = 0.1,
#class_weight = list("0"=0.1392319, "1"=1),
epochs = 177*2+77,
#callbacks = list(callback_reduce_lr_on_plateau(monitor = "val_loss", factor = 0.1),
# callback_tensorboard("logs/run_a", write_images = T, embeddings_freq = 1)),
batch_size = 8)
plot(history)
pribor_predict <- model %>%
predict(x = list(in_seq_event),
batch_size = 8,
verbose = 1)
plot(pribor.out, pribor_predict)