EnsembleEmbedders/conv_visualize.py at main · PrathamRathi/EnsembleEmbedders · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
import tensorflow as tf
import numpy as np
from src.chroma_rolls_preprocessor import get_chroma_from_midi, get_midi_from_chroma
import numpy as np
import tensorflow as tf
import pretty_midi as pm
import librosa.display
import matplotlib.pyplot as plt
import argparse
import json
import pprint

INFERENCE_DIR = 'inference/'
ORIGINAL = '-original.mid'
RECONSTRUCTED = '-model-reconstructed.mid'

def get_latent_encoding(model, chroma)->tf.Tensor:
    """
    Gets latent encoding for the given chroma
    Inputs:
    - model: a trained tf.keras.Model instance
    - chroma: numpy array that is the chroma representation of the midi in choice
    Returns: a tensor that is the latent encoding of the chroma generated by the model
    """
    chroma = np.expand_dims(chroma, 0).astype(np.int32)
    chroma = tf.transpose(chroma, perm=[0, 2, 3, 1])
    latent_encoding = model(chroma)[-1] # returns z
    # return tf.squeeze(latent_encoding, 0)
    return latent_encoding

def interpolate_by_average(model, file0, file1, weight):
    """
    Generate interpolation between two midi files by taking a weighted average of latent encodings
    Inputs:
    - model: a trained tf.keras.Model instance
    - file0, file1: paths to midi files that will be the start and end of the interpolation respectively
    - latent_size: Latent size of your model.
    - steps: number of steps between (and inclusive of) the start and end
    - name: base name of the midi file
    Returns: a tensor that is of the same shape as model output with steps number as the batch size
    """
    name0 = file0.split('/')[-1].split('.')[0]
    name1 = file1.split('/')[-1].split('.')[0]
    name = name0 + name1 + '.mid'
    chroma0 = get_chroma_from_midi(file0)
    chroma1 = get_chroma_from_midi(file1)
    z0 = get_latent_encoding(model,chroma0)
    z1 = get_latent_encoding(model,chroma1)
    z = weight * z0 + (1 - weight) * z1
    x = model.decoder(z)
    x = tf.transpose(x, perm=[0, 3, 1, 2])
    x = tf.squeeze(x).numpy()
    chroma_to_file(x, INFERENCE_DIR + 'average-' + name)
    return x

def interpolate_by_steps(model, file0, file1, steps):
    """
    Generate interpolation between two midi files by going step by step
    Inputs:
    - model: a trained tf.keras.Model instance
    - file0, file1: paths to midi files that will be the start and end of the interpolation respectively
    - steps: number of steps between (and inclusive of) the start and end
    - name: base name of the midi file
    Returns: a tensor that is of the same shape as model output with steps number as the batch size
    """
    name0 = file0.split('/')[-1].split('.')[0]
    name1 = file1.split('/')[-1].split('.')[0]
    name = name0 + name1 + '.mid'
    chroma0 = get_chroma_from_midi(file0)
    chroma1 = get_chroma_from_midi(file1)
    z0 = get_latent_encoding(model,chroma0)
    z1 = get_latent_encoding(model,chroma1)
    w = tf.linspace(0, 1, steps)
    w = tf.cast(tf.reshape(w, (steps, 1, 1)), dtype=tf.float32)
    z = tf.transpose(w * z0 + (1 - w) * z1, perm=[1, 0, 2])
    z = tf.squeeze(z,0)
    x = model.decoder(z)
    x = tf.transpose(x, perm=[0, 3, 1, 2])
    for i in range(steps):
        chroma = x[i]
        chroma = chroma.numpy()
        chroma_to_file(chroma, INFERENCE_DIR + 'step' + str(i) + name)
    return x

def chroma_to_file(chroma, file_path):
    """
    Converts chroma matrix to a midi and writes it to a file
    Inputs:
    - chroma: a numpy matrix that represents chroma representation of a midi
    - file_path: path to write midi to
    """
    midi = get_midi_from_chroma(chroma, tempo=120)
    plt.figure(figsize=(8, 4))
    plot_piano_roll(midi, 42, 90) # notes should be in 48 to 84
    # plt.show()
    basename = file_path.split('/')[-1]
    basename = basename.split('.')[0]
    plt.savefig(fname=INFERENCE_DIR + basename)
    midi.write(file_path)

def predict_and_write_midi(model, midi_file):
    """
    Gets model inference given a midi file and writes original midi and reconstructed midi
    Inputs:
    - model: a trained tf.keras.Model instance
    - midi_file: a string that is the path to the midi file of choice
    - name: a string that will be prepended to the written midi files
    """
    name = midi_file.split('/')[-1].split('.')[0]
    chroma = get_chroma_from_midi(midi_file)
    chroma_to_file(chroma, INFERENCE_DIR + name + ORIGINAL)
    chroma_batch = np.expand_dims(chroma, 0).astype(np.int32)
    chroma_batch = tf.transpose(chroma_batch, perm=[0, 2, 3, 1])
    pred_chroma = model(chroma_batch)[0]
    pred_chroma = tf.transpose(pred_chroma, perm=[0, 3, 1, 2])
    pred_chroma = tf.squeeze(pred_chroma, axis=0).numpy()
    chroma_to_file(pred_chroma, INFERENCE_DIR + name + RECONSTRUCTED)

# From https://github.com/craffel/pretty-midi/blob/main/Tutorial.ipynb
def plot_piano_roll(midi, start_pitch, end_pitch, fs=100):
    # Use librosa's specshow function for displaying the piano roll
    librosa.display.specshow(midi.get_piano_roll(fs)[start_pitch:end_pitch],
                                hop_length=1, sr=fs, x_axis='time', y_axis='cqt_note',
                                fmin=pm.note_number_to_hz(start_pitch))

def get_losses_from_history(model_path):
    model_name = model_path.split('/')[-1]
    with open('saved_model/history/' + model_name + '.json') as json_data:
        d = json.load(json_data)
        json_data.close()
    best_loss = d['loss'][-1]
    best_reconstruction_loss = d['recon. loss'][-1]
    best_KL_loss = d['kl loss'][-1]
    return best_loss, best_reconstruction_loss, best_KL_loss

def metrics(batch_path):
    true_data = np.load(batch_path)
    true_data = tf.transpose(true_data, perm=[0, 2, 3, 1]).numpy()
    pred = model(true_data)[0].numpy()
    pred = pred > .5
    pred = pred.astype(np.int32)
    mse = np.mean(np.square(true_data - pred))
    correct_predictions = np.sum(true_data == pred)
    total_predictions = true_data.size
    acc= correct_predictions / total_predictions
    return mse, acc

def parse_arguments():
    parser = argparse.ArgumentParser()
    parser.add_argument("-f", type=str, help = "pitches or chroma", default="chroma")
    parser.add_argument("-model", type=str, help = "model file name", default= "vae-default")
    return parser.parse_args()

if __name__ == "__main__":
    args = parse_arguments()
    # if (args.f == "pitches"):
    #     processing = get_data_from_midi
    # elif (args.f == "chroma"):
    #     processing = get_chroma_from_midi

    model_path = "saved_model/" + args.model
    model = tf.keras.models.load_model(model_path)
    losses = get_losses_from_history(model_path)
    model.summary()
    print("Best losses from training:")
    print("loss: {}".format(losses[0]))
    print("recon. loss: {}".format(losses[1]))
    print("KL loss: {}".format(losses[2]))
    print()

    test_midi_file0 = 'data/dancing_queen.mid'
    test_midi_file1 = 'data/africa.mid'
    test_midi_file2 = 'data/wake_me_up.mid'
    test_midi_file3 = 'data/fly_me_to_the_moon.mid'
    test_midi_file4 = 'data/major.mid'
    test_midi_file5 = 'data/minor.mid'

    # predict_and_write_midi(model, test_midi_file0, 'dq')
    # predict_and_write_midi(model, test_midi_file1, 'toto')
    predict_and_write_midi(model, test_midi_file4)
    predict_and_write_midi(model, test_midi_file5)

    interpolate_by_average(model,test_midi_file4, test_midi_file5, .5)
    # interpolate_by_steps(model,test_midi_file0, test_midi_file1, 5)
    print(metrics('preprocessed/chroma_rolls_batch_0.npy'))