gr-cls-multitask-v2/process_data.py at main · billy-enrizky/gr-cls-multitask-v2 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import numpy as np
from scipy.signal import butter, sosfilt, filtfilt
import scipy.io
# Define the band-pass filter function
def butter_bandpass(lowcut, highcut, fs, order=5):
    nyquist = 0.5 * fs  # Nyquist frequency
    low = lowcut / nyquist
    high = highcut / nyquist
    # Create a Butterworth band-pass filter
    b, a = butter(order, [low, high], btype='band')
    return b, a

def apply_bandpass_filter(data, lowcut, highcut, fs, order=4):
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    # Apply the filter using filtfilt (zero-phase filtering)
    y = filtfilt(b, a, data, axis=1)  # Filter along the time dimension
    return y

def get_data(task, avr=False):
    lowcut = 0.2  # Low cutoff frequency (Hz)
    highcut = 115  # High cutoff frequency (Hz)
    fs = 512  # Sampling frequency (Hz), adapt this to your actual EEG data sampling rate
    order = 4  # Filter order (typically between 3-5)
    categories = ['figurine', 'pen', 'chair', 'lamp', 'plant']
    all_data = {}
    participant = ["8","9"]
    for category in categories:
        all_data[category] = []
        for i in range(1, 6):
            data = np.loadtxt('data/%s_%s_%s%s.csv' % (category, participant[1], task, i), delimiter=',')
            num_rows = data.shape[0]
            data = apply_bandpass_filter(data, lowcut, highcut, fs, order=order)
            data = data.reshape(num_rows, 307, 64)
            # average across trials
            averaged_trials = np.mean(data, axis=0)[None, :, :]
            # Append data
            all_data[category].append(averaged_trials)
    return all_data

def get_data_matlab(task, avr=False):
    lowcut = 0.2  # Low cutoff frequency (Hz)
    highcut = 115  # High cutoff frequency (Hz)
    fs = 512  # Sampling frequency (Hz), adapt this to your actual EEG data sampling rate
    order = 4  # Filter order (typically between 3-5)
    categories = ['figurine', 'pen', 'chair', 'lamp', 'plant']
    all_data = {}
    dataset = []
    removed_participants = [13, 1,2,3, 8,9] # add more participants as needed
    for i in range(1, 17):
        if i not in removed_participants:
            mat = scipy.io.loadmat(f'matlab_files/classification_erps/exp_{i}_{task}.mat')
            dataset.append(mat)
    for category in categories:
        all_data[category] = []
        for i in range(1, 6):
            object_to_average_over_exp = []
            for file in dataset:
                data = file[category][f"ob{i}"][0][0]
                data = data.transpose(0, 2, 1)
                num_trials, num_timepoints, num_channels = data.shape
                # Determine the number of new trials after averaging every 4
                # Initialize the array to hold the averaged data
                averaged_trials = np.mean(data, axis=0)[None, :, :]
                # Append data
                object_to_average_over_exp.append(averaged_trials)
            concat_begin = object_to_average_over_exp[0]
            for j in range(1, len(object_to_average_over_exp)):
                concat_begin = np.concatenate((concat_begin, object_to_average_over_exp[j]), axis=0)
            summed = concat_begin.sum(axis=0)
            all_data[category].append(summed)
    return all_data

# this function was just to test whether the new data aligned with the new stuff (it did not...)
def test_data():
    categories = ['figurine', 'pen', 'chair', 'lamp', 'plant']
    all_data = {}
    dataset = []
    removed_participants = [13] # add more participants as needed
    mat = scipy.io.loadmat(f'matlab_files/classification_erps/exp_8_class.mat')
    dataset.append(mat)
    for category in categories:
        all_data[category] = []
        for i in range(1, 6):
            object_to_average_over_exp = []
            for file in dataset:
                data = file[category][f"ob{i}"][0][0]
                data = data.transpose(0, 2, 1)
                # Determine the number of new trials after averaging every 4
                # Initialize the array to hold the averaged data
                averaged_trials = np.mean(data, axis=0)[None, :, :]
                # Append data
                object_to_average_over_exp.append(averaged_trials)
            concat_begin = object_to_average_over_exp[0]
            for j in range(1, len(object_to_average_over_exp)):
                concat_begin = np.concatenate((concat_begin, object_to_average_over_exp[j]), axis=0)
            summed = concat_begin.sum(axis=0)
            all_data[category].append(summed)
    all_data_other = {}
    participant = ["8","9"]
    for category in categories:
        all_data_other[category] = []
        for i in range(1, 6):
            data = np.loadtxt('data/%s_%s_%s%s.csv' % (category, participant[1], "cls", i), delimiter=',')
            num_rows = data.shape[0]
            data = data.reshape(num_rows, 307, 64)
            # average across trials
            averaged_trials = np.mean(data, axis=0)[None, :, :]
            # Append data
            all_data_other[category].append(averaged_trials)
    print(np.array_equal(all_data_other["figurine"][0][0, :,:],all_data["figurine"][0]) )
    print(all_data_other["figurine"][0][0][:, 0].sum())
    print(all_data["figurine"][0][:, 0].sum())
test_data()