initial commit

willGuimont · willGuimont · commit 220e9385f463 · 2021-12-14T12:54:27.000-05:00
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+/venv/
+/.idea/
diff --git a/learnable_fourier_pos_encoding.py b/learnable_fourier_pos_encoding.py
@@ -0,0 +1,68 @@
+import numpy as np
+import torch
+import torch.nn as nn
+
+
+class LearnableFourierPositionalEncoding(nn.Module):
+    def __init__(self, G: int, M: int, F_dim: int, H_dim: int, D: int, gamma: float):
+        """
+        Learnable Fourier Features from https://arxiv.org/pdf/2106.02795.pdf (Algorithm 1)
+        Implementation of Algorithm 1: Compute the Fourier feature positional encoding of a multi-dimensional position
+        Computes the positional encoding of a tensor of shape [N, G, M]
+        :param G: positional groups (positions in different groups are independent)
+        :param M: each point has a M-dimensional positional values
+        :param F_dim: depth of the Fourier feature dimension
+        :param H_dim: hidden layer dimension
+        :param D: positional encoding dimension
+        :param gamma: parameter to initialize Wr
+        """
+        super().__init__()
+        self.G = G
+        self.M = M
+        self.F_dim = F_dim
+        self.H_dim = H_dim
+        self.D = D
+        self.gamma = gamma
+
+        # Projection matrix on learned lines (used in eq. 2)
+        self.Wr = nn.Linear(self.M, self.F_dim // 2, bias=False)
+        # MLP (GeLU(F @ W1 + B1) @ W2 + B2 (eq. 6)
+        self.mlp = nn.Sequential(
+            nn.Linear(self.F_dim, self.H_dim, bias=True),
+            nn.GELU(),
+            nn.Linear(self.H_dim, self.D // self.G)
+        )
+
+        self.init_weights()
+
+    def init_weights(self):
+        nn.init.normal_(self.Wr.weight.data, mean=0, std=self.gamma ** -2)
+
+    def forward(self, x):
+        """
+        Produce positional encodings from x
+        :param x: tensor of shape [N, G, M] that represents N positions where each position is in the shape of [G, M],
+                  where G is the positional group and each group has M-dimensional positional values.
+                  Positions in different positional groups are independent
+        :return: positional encoding for X
+        """
+        N, G, M = x.shape
+        # Step 1. Compute Fourier features (eq. 2)
+        projected = self.Wr(x)
+        cosines = torch.cos(projected)
+        sines = torch.sin(projected)
+        F = 1 / np.sqrt(self.F_dim) * torch.cat([cosines, sines], dim=-1)
+        # Step 2. Compute projected Fourier features (eq. 6)
+        Y = self.mlp(F)
+        # Step 3. Reshape to x's shape
+        PEx = Y.reshape((N, self.D))
+        return PEx
+
+
+if __name__ == '__main__':
+    G = 3
+    M = 17
+    x = torch.randn((97, G, M))
+    enc = LearnableFourierPositionalEncoding(G, M, 768, 32, 768, 10)
+    pex = enc(x)
+    print(pex.shape)
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,2 @@
+numpy
+torch