python – pytorch LSTM model with unequal hidden layer

i have tuned a lstm model in keras as follows. but i dont know how write that code in pytorch. i put my pytorch code here but i dont think be right, because It does not give the right answer. how much I searched, I could not find a sample code in pytorch for more than one lstm layer with unequal hidden layers. my input shape is (None,(60,10)) with output shape (None,15) Please express a similar example for my keras model in pytorch. Thank


model_input = keras.Input(shape=(60, 10))
x_1 = layers.LSTM(160,return_sequences=True)(model_input)
x_1 = layers.LSTM(190)(x_1)
x_1 = layers.Dense(200)(x_1)
x_1 = layers.Dense(15)(x_1)
model = keras.models.Model(model_input, x_1)


input_dim = 10
hidden_dim_1 = 160
hidden_dim_2 = 190
hidden_dim_3 = 200
num_layers = 1
output_dim = 15

class LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim_1, hidden_dim_2, hidden_dim_3 ,num_layers, output_dim):
        super(LSTM, self).__init__()
        self.hidden_dim_1 = hidden_dim_1
        self.hidden_dim_2 = hidden_dim_2
        self.hidden_dim_3 = hidden_dim_3
        self.num_layers = num_layers
        self.lstm_1 = nn.LSTM(input_dim, hidden_dim_1, num_layers, batch_first=True)
        self.lstm_2 = nn.LSTM(hidden_dim_1, hidden_dim_2, num_layers, batch_first=True)
        self.fc_1 = nn.Linear(hidden_dim_2, hidden_dim_3)
        self.fc_out = nn.Linear(hidden_dim_3, output_dim)

    def forward(self, x):
        input_X = x
        h_1 = torch.zeros(num_layers, 1 , self.hidden_dim_1).requires_grad_()
        c_1 = torch.zeros(num_layers, 1 , self.hidden_dim_1).requires_grad_()
        h_2 = torch.zeros(num_layers, 1 , self.hidden_dim_2).requires_grad_()
        c_2 = torch.zeros(num_layers, 1 , self.hidden_dim_2).requires_grad_()
        out_put = ()
        for i, input_t in enumerate(input_X.chunk(input_X.size(0))):
          out_lstm_1 , (h_1, c_1) = self.lstm_1(input_t, (h_1.detach(), c_1.detach()))
          out_lstm_2 , (h_2, c_2) = self.lstm_2(out_lstm_1, (h_2.detach(), c_2.detach()))
          out_Dense_1 = self.fc_1(out_lstm_2(:, -1, :))
          out_Dense_out = self.fc_out(out_Dense_1)
          out_put += out_Dense_out
        out_put = torch.stack(out_put, 0).squeeze(1)
        return out_put

python – LSTM Encoder Decoder Network reuse the last state or the last output

I have a question regarding encoder-decoder LSTM Networks.
While looking for guides on how to implement them I came across two different approaches. One of them uses the last output of the encoder network as the encoding of the input, the other one uses the state of the encoder LSTM after the last output has been generated e.g h and c.

The first one built down to this implementaiton

model = Sequential()
model.add(LSTM(..., input_shape=(...)))
model.add(LSTM(..., return_sequences=True))

The second is a little more complex, and can be implemented like this: (source link2)

 # define inference encoder
 encoder_model = Model(encoder_inputs, encoder_states)
 # define inference decoder
 decoder_state_input_h = Input(shape=(n_units,))
 decoder_state_input_c = Input(shape=(n_units,))
 decoder_states_inputs = (decoder_state_input_h, decoder_state_input_c)
 decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs, initial_state=decoder_states_inputs)
 decoder_states = (state_h, state_c)
 decoder_outputs = decoder_dense(decoder_outputs)
 decoder_model = Model((decoder_inputs) + decoder_states_inputs, (decoder_outputs) + decoder_states)

So my question is:
what are the main differences between both approaches
and when should you use which of them?

From my understanding, the first one is easier to implement but less flexible, since we need to define the number of output steps in RepeatVector(steps). While the second one is more complicated but more flexible.

Here are the two guides that both implement and LSTM Encoder-Decoder Model, but both in a different manner:

  • Reusing the last output link1
  • Reusing the last statelink2

python – Feeding different sequences (each of different length and normalization) to LSTM

I am currently feeding a 2 dimensional dataframe to an lstm read in from data.csv. The dataframe contains multiple time series, that are normalized with a minmaxscaler.

However, I would like to train from multiple files. Each of different length, with a different normalization scale, and different files do not sequentially follow each other.

What is the best way to go about this? Do I create a variable length input batch for each file? Or is there some easier way to deal with this?

import torch.nn as nn
from torch.autograd import Variable

f = 'data.csv'
df = pd.read_csv(f, sep=",", index_col=0)
y = df.iloc(:, 10:12)

y = y.astype(int)
X = df.iloc(:, np.r_(0:6))

X_old = X
y_old = y

from sklearn.preprocessing import StandardScaler, MinMaxScaler
mm = MinMaxScaler()
ss = StandardScaler()

X_ss = ss.fit_transform(X)
y_mm = mm.fit_transform(y)

y_mm = y.to_numpy() #added

#first 200 for training
X_train = X_ss(:1800, :)
X_test = X_ss(1800:, :)

# y_train = y_mm(:1800, :)
# y_test = y_mm(1800:, :)
y_train = y_mm(:1800, :)
y_test = y_mm(1800:, :)

print("Training Shape", X_train.shape, y_train.shape)
print("Testing Shape", X_test.shape, y_test.shape)  

X_train_tensors = Variable(torch.Tensor(X_train))
X_test_tensors = Variable(torch.Tensor(X_test))

y_train_tensors = Variable(torch.Tensor(y_train))
y_test_tensors = Variable(torch.Tensor(y_test))

a = X_train_tensors.shape(0)

#reshaping to rows, timestamps, features
X_train_tensors_final = torch.reshape(X_train_tensors,   (X_train_tensors.shape(0), 1, X_train_tensors.shape(1)))
X_test_tensors_final = torch.reshape(X_test_tensors,  (X_test_tensors.shape(0), 1, X_test_tensors.shape(1)))

print("Training Shape", X_train_tensors_final.shape, y_train_tensors.shape)
print("Testing Shape", X_test_tensors_final.shape, y_test_tensors.shape)

class LSTM1(nn.Module):
    def __init__(self, num_classes, input_size, hidden_size, num_layers, seq_length):
        super(LSTM1, self).__init__()
        self.num_classes = num_classes  # number of classes
        self.num_layers = num_layers  # number of layers
        self.input_size = input_size  # input size
        self.hidden_size = hidden_size  # hidden state
        self.seq_length = seq_length  # sequence length

        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                            num_layers=num_layers, batch_first=True)  # lstm
        self.fc_1 = nn.Linear(hidden_size, 128)  # fully connected 1
        self.fc = nn.Linear(128, num_classes)  # fully connected last layer

        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        h_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size))  # hidden state

        c_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size))  # internal state

        # Propagate input through LSTM

        output, (hn, cn) = self.lstm(x, (h_0, c_0))  # lstm with input, hidden, and internal state

        hn = hn.view(-1, self.hidden_size)  # reshaping the data for Dense layer next

        out = self.relu(hn)

        out = self.fc_1(out)  # first Dense

        out = self.relu(out)  # relu

        out = self.fc(out)  # Final Output
        out = self.sigmoid(out)

        return out

lstm1 = LSTM1(num_classes, input_size, hidden_size, num_layers, X_train_tensors_final.shape(1)) #our lstm class

criterion = torch.nn.BCELoss() #MSELoss()    # mean-squared error for regression
optimizer = torch.optim.Adam(lstm1.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    outputs = lstm1.forward(X_train_tensors_final)  # forward pass
    optimizer.zero_grad()  # caluclate the gradient, manually setting to 0

    # obtain the loss function
    loss = criterion(outputs, y_train_tensors)

    loss.backward()  # calculates the loss of the loss function

    optimizer.step()  # improve from loss, i.e backprop
    if epoch % 100 == 0:
        print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))

classification – LSTM : What should I do if I am always getting an output too close to one value?

The usual starting point is that if the score is above 0.5, classify it as ham, otherwise as spam. If most emails are ham, then it makes sense that most emails give you a score above 0.5, so you have not said anything that indicates there is a problem.

This approach assumes that the proportion of ham vs spam in the training set is the same as the proportion at test time.

If that doesn’t work, one standard approach is to choose a threshold, and everything with a score above the threshold is treated as ham, everything below as spam. A standard way to set a threshold is, after you’ve trained the LSTM, choose the optimal threshold based on the training set (i.e., that maximizes the accuracy on the training set, etc.), or on a validation set.

machine learning – Designing a neural network with LSTM and feedforward NN combination

Currently, I’m designing a neural network that works with reinforcement learning. In summary, the agent takes in information about itself and nearby agents and, in conjunction with global world information, makes a decision.

I’m currently thinking of implementing this as a LSTM to take in information about itself and a variable number of nearby agents and a feedforward neural network to combine the information from the LSTM output and global world information to produce an action.

Would this approach be sufficient to produce meaningful results? I thought that another approach would be to take in the global world information and each agent at each LSTM cell, though it may use much more resources (resources during forward propagation are a main concern with this project). Also, if the second approach is used, how would I be able to link the inputs to outputs if they had different shapes (attempting to learn without a library)? How would I be able to map an input with shape (1, x, 6) to (1, 1, 4) or (1, 4).

machine learning – Modifying the code for reading .ogg datasets and apply LSTM

Deep learning/LSTM/Matlab

There is a Matlab code that is doing the following steps for deep learning and applying LSTM, I need to change first three steps to use our dataset to train this model and you don’t need to change other.

I need to apply that for .ogg audio files so Create and Use some audio files with .ogg format as sample data and give me the code.

The following steps is for your information:

Three classes of audio signals are generated and labeled as ‘white’, ‘brown’, and ‘pink’. Each class has 1000 samples.

800 samples from each class are used as the training samples to train the deep neural network, so total 800*3=2400 samples in the training dataset. Their labels are their class names ‘white’, ‘brown’, and ‘pink’. (Lines 29 and 30)

200 samples from each class are used as the validation samples to test the performance of deep neural network, so total 600 samples in the validation dataset. Their labels are their class names ‘white’, ‘brown’, and ‘pink’ (Lines 32 and 33)

Extract features from the training dataset and validation dataset.

  • define the structure of the neural network model (LSTM)
  • set training options
  • train the model iteratively using the training dataset and test the model using the validation dataset every iteration.
  • finish training and get the trained model.
  • generate test dataset and use the trained model to classify the test dataset into three classes, ‘white’, ‘brown’, and ‘pink’


fs = 44.1e3;
duration = 0.5;
N = duration*fs;
wNoise = 2*rand((N,1000)) - 1;
wLabels = repelem(categorical("white"),1000,1);
bNoise = filter(1,(1,-0.999),wNoise);
bNoise = bNoise./max(abs(bNoise),(),'all');
bLabels = repelem(categorical("brown"),1000,1);
pNoise = pinknoise((N,1000));
pLabels = repelem(categorical("pink"),1000,1)
title('White Noise')
title('Brown Noise')
title('Pink Noise')
featuresTrain = extract(aFE,audioTrain);
(numHopsPerSequence,numFeatures,numSignals) = size(featuresTrain)
audioTrain = (wNoise(:,1:800),bNoise(:,1:800),pNoise(:,1:800));
labelsTrain = (wLabels(1:800);bLabels(1:800);pLabels(1:800));
audioValidation = (wNoise(:,801:end),bNoise(:,801:end),pNoise(:,801:end));
labelsValidation = (wLabels(801:end);bLabels(801:end);pLabels(801:end));
aFE = audioFeatureExtractor("SampleRate",fs, ...
"SpectralDescriptorInput","melSpectrum", ...
"spectralCentroid",true, ...
featuresTrain = permute(featuresTrain,(2,1,3));
featuresTrain = squeeze(num2cell(featuresTrain,(1,2)));
numSignals = numel(featuresTrain)
(numFeatures,numHopsPerSequence) = size(featuresTrain{1})
featuresValidation = extract(aFE,audioValidation);
featuresValidation = permute(featuresValidation,(2,1,3));
featuresValidation = squeeze(num2cell(featuresValidation,(1,2)));
layers = ( ...
options = trainingOptions("adam", ...
"Shuffle","every-epoch", ...
"ValidationData",{featuresValidation,labelsValidation}, ...
"Plots","training-progress", ...
net = trainNetwork(featuresTrain,labelsTrain,layers,options);
wNoiseTest = 2*rand((N,1)) - 1;
bNoiseTest = filter(1,(1,-0.999),wNoiseTest);
bNoiseTest= bNoiseTest./max(abs(bNoiseTest),(),'all');
pNoiseTest = pinknoise(N);

python – LSTM Model – Validation Accuracy is not changing

I am working on classification problem, My input data is labels and output expected data is labels
I have made X, Y pairs by shifting the X and Y is changed to the categorical value

    X   Y
    2   1.0
    1   2.0
    1   1.0
    2   1.0
    2   2.0
encoder = LabelEncoder()
test_labels = to_categorical(encoder.fit_transform(values(:,1)),num_classes=3)
train_X,test_X,train_y,test_y= train_test_split(values(:,0), test_labels,test_size = 0.30,random_state = 42)


(154076, 3)
(66033, 3)
Converting this to LSTM format

train_X = train_X.reshape(train_X.shape(0),1,1)
test_X = test_X.reshape(test_X.shape(0),1,1)

# configure network
n_batch = 1
n_epoch = 10
n_neurons = 100



model = tf.keras.models.Sequential((
    tf.keras.layers.LSTM(n_neurons, batch_input_shape=(n_batch, train_X.shape(1),train_X.shape(2)), stateful=True),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(100, activation = 'relu',kernel_regularizer=regularizers.l2(0.0001)),
    tf.keras.layers.Dense(3, activation='softmax')

history =,train_y,validation_data=(test_X, test_y),epochs=n_epoch, batch_size=n_batch, verbose=1,shuffle= False)

Validation Accuracy is not Changing

Epoch 1/5
154076/154076 (==============================) - 356s 2ms/step - loss: 1.0844 - acc: 0.4269 - val_loss: 1.0814 - val_acc: 0.4310
Epoch 2/5
154076/154076 (==============================) - 354s 2ms/step - loss: 1.0853 - acc: 0.4256 - val_loss: 1.0813 - val_acc: 0.4310
Epoch 3/5
154076/154076 (==============================) - 355s 2ms/step - loss: 1.0861 - acc: 0.4246 - val_loss: 1.0814 - val_acc: 0.4310
Epoch 4/5
154076/154076 (==============================) - 356s 2ms/step - loss: 1.0874 - acc: 0.4228 - val_loss: 1.0825 - val_acc: 0.4310
Epoch 5/5
154076/154076 (==============================) - 353s 2ms/step - loss: 1.0887 - acc: 0.4208 - val_loss: 1.0828 - val_acc: 0.4310

What can be the changes to improve the model.

python – Keras LSTM index order (ascending or decending)

When doing an LSTM should the datetime index be ascending or decending? What I mean is should the head of the dataset be 2014 and the tail 2020 or the other way around? The reason I’m asking about this is because of the LSTM lookback period, I’m afraid that if it is not sorted correctly, then it will look into the wrong timeframe.

My current timeframe index when doing print(df) looks like this:

2014-21-3 XYZ
2014-22-3 XYZ
2014-23-3 XYZ

Should it be changed to this order instead for the LSTM to “work”?

2014-23-3 XYZ
2014-22-3 XYZ
2014-21-3 XYZ

python – LSTM Neural Network

I have tried to build a neural network comprised of ten neurons, but I don’t know if my code in Python is any good. I applied the equations for an LSTM neuron in the code, for it to work, and then I made a network of neurons. Would this work or is it just a failed attempt?

h = hidden layer

num_hl = number of hidden layers

Could you please give some feedback on my code and tell me what I can make better etc. Thank you! Here is my code:

import math

num_hl = 10
inp = list(range(num_hl))

h = list(range(num_hl))

fg_p1 = list(range(num_hl))
fg_p2 = list(range(num_hl))

fg_p3 = list(range(num_hl))
forget_gate = list(range(num_hl))

for i in range(1, (num_hl - 1)):

    fg_p1(i) = h(1) * h(i - 1)
    fg_p2(i) = h(1) * inp(i)

    fg_p3(i) = fg_p1(i) + fg_p2(i) + len(h)

    forget_gate(i) = 1/(1 + math.exp(-fg_p3(i)))

inp_p1 = list(range(num_hl))
inp_p2 = list(range(num_hl))

inp_p3 = list(range(num_hl))
input_gate = list(range(num_hl))

for i in range(1, (num_hl - 1)):

    inp_p1(i) = h(2) * h(i - 1)
    inp_p2(i) = h(2) * inp(i)

    inp_p3(i) = inp_p1(i) + inp_p2(i) + len(h)

    input_gate(i) = 1/(1 + math.exp(-inp_p3(i)))

act_vec_p1 = list(range(num_hl))
act_vec_p2 = list(range(num_hl))

act_vec_p3 = list(range(num_hl))
activation_vector = list(range(num_hl))

for i in range(1, (num_hl - 1)):

    act_vec_p1(i) = h(3) * h(i - 1)
    act_vec_p2(i) = h(3) * inp(i)

    act_vec_p3(i) = act_vec_p1(i) + act_vec_p2(i) + len(h)

    activation_vector(i) = math.tanh(act_vec_p3(i))

state_vector = list(range(num_hl))

for i in range(1, (num_hl - 1)):

    state_vector(i) = forget_gate(i) * state_vector(i - 1) + input_gate(i) * activation_vector(i)

out_p1 = list(range(num_hl))
out_p2 = list(range(num_hl))

out_p3 = list(range(num_hl))
out_gate = list(range(num_hl))

for i in range(1, (num_hl - 1)):

    out_p1(i) = h(3) * h(i - 1)
    out_p2(i) = h(3) * inp(i)

    out_p3(i) = out_p1(i) + out_p2(i) + len(h)

    out_gate(i) = 1/(1 + math.exp(-out_p3(i)))

for i in range(1, (num_hl - 1)):

    h(i) = out_gate(i) * math.tanh(state_vector(i))