Skip to content

Questions about ANCHORED FEATURE REGULARIZER #7

@iliaohai

Description

@iliaohai

Hi, I'm trying to use ANCHORED FEATURE REGULARIZER as you suggested, but I'm having a problem that when Batch_size=1, lowerbound_loss is always 0. After checking the code, it's caused by the following code, can you help me? Thanks.

class InfoNCE(nn.Module):
    def __init__(self, x_dim, y_dim):
        super(InfoNCE, self).__init__()
        self.lower_size = 300
        self.F_func = nn.Sequential(nn.Linear(x_dim + y_dim, self.lower_size),
                                    nn.ReLU(),
                                    nn.Linear(self.lower_size, 1),
                                    nn.Softplus())

    def forward(self, x_samples, y_samples):  # samples have shape [sample_size, dim]
        print("used InfoNCE")
        print("x_shape")
        print(x_samples.shape)
        print(x_samples)
        print("y_samples")
        print(y_samples.shape)
        print(y_samples)
        # shuffle and concatenate
        sample_size = y_samples.shape[0]
        random_index = torch.randint(sample_size, (sample_size,)).long()

        x_tile = x_samples.unsqueeze(0).repeat((sample_size, 1, 1))
        y_tile = y_samples.unsqueeze(1).repeat((1, sample_size, 1))

        T0 = self.F_func(torch.cat([x_samples, y_samples], dim=-1))
        T1 = self.F_func(torch.cat([x_tile, y_tile], dim=-1))  # [s_size, s_size, 1]
        print("T0:")
        print(T0)
        print("T1:")
        print(T1)
        lower_bound = T0.mean() - (
                    T1.logsumexp(dim=1).mean() - np.log(sample_size))  # torch.log(T1.exp().mean(dim = 1)).mean()
        # compute the negative loss (maximise loss == minimise -loss)
        print("lower_bound:")
        print(lower_bound)
        return lower_bound

========print==========
used InfoNCE
x_shape
torch.Size([1, 1024])
tensor([[-1.1419, 0.0000, 0.1771, ..., -2.2108, 0.0000, 0.5778]],
device='cuda:0', grad_fn=)
y_samples
torch.Size([1, 1024])
tensor([[-0.1306, -0.1803, -0.0562, ..., -0.0158, -0.0930, -0.0641]],
device='cuda:0', grad_fn=)
T0:
tensor([[0.5524]], device='cuda:0', grad_fn=)
T1:
tensor([[[0.5524]]], device='cuda:0', grad_fn=)
lower_bound:
tensor(0., device='cuda:0', grad_fn=)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions