!python -V

Python 3.12.6

# Suppress warnings
import warnings
for warn in [UserWarning, FutureWarning]: warnings.filterwarnings("ignore", category = warn)

import os
import numpy as np
import torch
import torch.nn as nn
import pandas as pd
import jupyterlab as jlab

packages = [
    "Torch", "NumPy", "Pandas", "JupyterLab",
]

package_objects = [
    torch, np, pd, jlab
]

versions = list(map(lambda obj: obj.__version__, package_objects))

pkgs = {"Package": packages, "Version": versions}
df_pkgs = pd.DataFrame(data = pkgs)
df_pkgs.index.name = "#"
df_pkgs.index += 1

display(df_pkgs)

path_to_reqs = "."
reqs_name = "requirements.txt"

def get_packages_and_versions():
    """Generate strings with libraries and their versions in the format: package==version"""
    
    for package, version in zip(packages, versions):
        yield f"{package.lower()}=={version}\n"

with open(os.path.join(path_to_reqs, reqs_name), "w", encoding = "utf-8") as f:
    f.writelines(get_packages_and_versions())

# Create a ReLU activation object
g = nn.ReLU()

# Create a random tensor
input = torch.tensor([-2.3526, 1.1458]) # torch.randn(2)

# Apply the ReLU activation function to the input data
output = g(input)

# Print the input and output data
print("Input data:", input)
print("Output data:", output)

Input data: tensor([-2.3526,  1.1458])
Output data: tensor([0.0000, 1.1458])

# Create a ELU activation object
g = nn.ELU(alpha = 1.0)

# Create a random tensor
input = torch.tensor([-2.3526, 1.1458]) # torch.randn(2)

# Apply the ELU activation function to the input data
output = g(input)

# Print the input and output data
print("Input data:", input)
print("Output data:", output)

Input data: tensor([-2.3526,  1.1458])
Output data: tensor([-0.9049,  1.1458])

# Create a PReLU activation object
g = nn.PReLU(num_parameters = 4, init = 0.25)

# Create a random tensor
input = torch.tensor([
    [ 0.6465, -0.9450, -0.5559, -1.5250],
    [-1.4968, -1.1030,  0.5872, -0.7036]
]) # torch.randn(2, 4)

# Apply the PReLU activation function to the input data
output = g(input)

# Print the input and output data
print("Input data:", input)
print("Output data:", output)

Input data: tensor([[ 0.6465, -0.9450, -0.5559, -1.5250],
        [-1.4968, -1.1030,  0.5872, -0.7036]])
Output data: tensor([[ 0.6465, -0.2362, -0.1390, -0.3812],
        [-0.3742, -0.2758,  0.5872, -0.1759]], grad_fn=<PreluKernelBackward0>)

# Create a LeakyReLU activation object
g = nn.LeakyReLU(negative_slope = 0.01)

# Create a random tensor
input = torch.tensor([-2.3526, 1.1458]) # torch.randn(2)

# Apply the LeakyReLU activation function to the input data
output = g(input)

# Print the input and output data
print("Input data:", input)
print("Output data:", output)

Input data: tensor([-2.3526,  1.1458])
Output data: tensor([-0.0235,  1.1458])

# Create a ReLU6 activation object
g = nn.ReLU6()

# Create a random tensor
input = torch.tensor([-2.3526, 1.1458, 6.2345]) # torch.randn(3)

# Apply the ReLU6 activation function to the input data
output = g(input)

# Print the input and output data
print("Input data:", input)
print("Output data:", output)

Input data: tensor([-2.3526,  1.1458,  6.2345])
Output data: tensor([0.0000, 1.1458, 6.0000])

# Create a RReLU activation object
g = nn.RReLU(lower = 0.125, upper = 0.333)

# Create a random tensor
input = torch.tensor([-2.3526, 1.1458, 6.2345]) # torch.randn(3)

# Apply the RReLU activation function to the input data
output = g(input)

# Print the input and output data
print("Input data:", input)
print("Output data:", output)

Input data: tensor([-2.3526,  1.1458,  6.2345])
Output data: tensor([-0.7478,  1.1458,  6.2345])

# Create a SELU activation object
g = nn.SELU()

# Create a random tensor
input = torch.tensor([-2.3526, 1.1458]) # torch.randn(2)

# Apply the SELU activation function to the input data
output = g(input)

# Print the input and output data
print("Input data:", input)
print("Output data:", output)

Input data: tensor([-2.3526,  1.1458])
Output data: tensor([-1.5909,  1.2039])

# Create a CELU activation object
g = nn.CELU(alpha = 1.0)

# Create a random tensor
input = torch.tensor([-2.3526, 1.1458]) # torch.randn(2)

# Apply the CELU activation function to the input data
output = g(input)

# Print the input and output data
print("Input data:", input)
print("Output data:", output)

Input data: tensor([-2.3526,  1.1458])
Output data: tensor([-0.9049,  1.1458])

# Create a GELU activation object
g = nn.GELU(approximate = "none") # none | tanh

# Create a random tensor
input = torch.tensor([-2.3526, 1.1458]) # torch.randn(2)

# Apply the GELU activation function to the input data
output = g(input)

# Print the input and output data
print("Input data:", input)
print("Output data:", output)

Input data: tensor([-2.3526,  1.1458])
Output data: tensor([-0.0219,  1.0015])

# Create a Sigmoid activation object
g = nn.Sigmoid()

# Create a random tensor
input = torch.tensor([-2.3526, 1.1458]) # torch.randn(2)

# Apply the Sigmoid activation function to the input data
output = g(input)

# Print the input and output data
print("Input data:", input)
print("Output data:", output)

Input data: tensor([-2.3526,  1.1458])
Output data: tensor([0.0869, 0.7587])

# Create a SiLU activation object
g = nn.SiLU()

# Create a random tensor
input = torch.tensor([-2.3526, 1.1458]) # torch.randn(2)

# Apply the SiLU activation function to the input data
output = g(input)

# Print the input and output data
print("Input data:", input)
print("Output data:", output)

Input data: tensor([-2.3526,  1.1458])
Output data: tensor([-0.2043,  0.8694])

# Create a LogSigmoid activation object
g = nn.LogSigmoid()

# Create a random tensor
input = torch.tensor([-2.3526, 1.1458]) # torch.randn(2)

# Apply the LogSigmoid activation function to the input data
output = g(input)

# Print the input and output data
print("Input data:", input)
print("Output data:", output)

Input data: tensor([-2.3526,  1.1458])
Output data: tensor([-2.4435, -0.2761])

# Create a Hardsigmoid activation object
g = nn.Hardsigmoid()

# Create a random tensor
input = torch.tensor([-3.3526, 3.1458, -2.0256, 1.7843]) # torch.randn(4)

# Apply the Hardsigmoid activation function to the input data
output = g(input)

# Print the input and output data
print("Input data:", input)
print("Output data:", output)

Input data: tensor([-3.3526,  3.1458, -2.0256,  1.7843])
Output data: tensor([0.0000, 1.0000, 0.1624, 0.7974])

# Create a Tanh activation object
g = nn.Tanh()

# Create a random tensor
input = torch.tensor([-2.3526, 1.1458]) # torch.randn(2)

# Apply the Tanh activation function to the input data
output = g(input)

# Print the input and output data
print("Input data:", input)
print("Output data:", output)

Input data: tensor([-2.3526,  1.1458])
Output data: tensor([-0.9821,  0.8164])

# Create a Tanhshrink activation object
g = nn.Tanhshrink()

# Create a random tensor
input = torch.tensor([-2.3526, 1.1458]) # torch.randn(2)

# Apply the Tanhshrink activation function to the input data
output = g(input)

# Print the input and output data
print("Input data:", input)
print("Output data:", output)

Input data: tensor([-2.3526,  1.1458])
Output data: tensor([-1.3705,  0.3294])

# Create a Hardtanh activation object
g = nn.Hardtanh(min_val = -1.0, max_val = 1.0)

# Create a random tensor
input = torch.tensor([-1.1383, 1.1630, -0.8715, 0.7228]) # torch.randn(4)

# Apply the Hardtanh activation function to the input data
output = g(input)

# Print the input and output data
print("Input data:", input)
print("Output data:", output)

Input data: tensor([-1.1383,  1.1630, -0.8715,  0.7228])
Output data: tensor([-1.0000,  1.0000, -0.8715,  0.7228])

# Create a Hardshrink activation object
g = nn.Hardshrink(lambd = 1.1458)

# Create a random tensor
input = torch.tensor([-2.3526, 1.1458]) # torch.randn(2)

# Apply the Hardshrink activation function to the input data
output = g(input)

# Print the input and output data
print("Input data:", input)
print("Output data:", output)

Input data: tensor([-2.3526,  1.1458])
Output data: tensor([-2.3526,  0.0000])

# Create a Hardswish activation object
g = nn.Hardswish()

# Create a random tensor
input = torch.tensor([-3.3526, 3.1458, -2.0256, 1.7843]) # torch.randn(4)

# Apply the Hardswish activation function to the input data
output = g(input)

# Print the input and output data
print("Input data:", input)
print("Output data:", output)

Input data: tensor([-3.3526,  3.1458, -2.0256,  1.7843])
Output data: tensor([-0.0000,  3.1458, -0.3290,  1.4228])

# Create a Mish activation object
g = nn.Mish()

# Create a random tensor
input = torch.tensor([-2.3526, 1.1458]) # torch.randn(2)

# Apply the Mish activation function to the input data
output = g(input)

# Print the input and output data
print("Input data:", input)
print("Output data:", output)

Input data: tensor([-2.3526,  1.1458])
Output data: tensor([-0.2132,  1.0198])

# Create a Softplus activation object
g = nn.Softplus(beta = 1.0, threshold = 20.0)

# Create a random tensor
input = torch.tensor([-2.3526, 1.1458]) # torch.randn(2)

# Apply the Softplus activation function to the input data
output = g(input)

# Print the input and output data
print("Input data:", input)
print("Output data:", output)

Input data: tensor([-2.3526,  1.1458])
Output data: tensor([0.0909, 1.4219])

# Create a Softshrink activation object
g = nn.Softshrink(lambd = 0.5)

# Create a random tensor
input = torch.tensor([-2.3526, 1.1458, 0.4320, -0.3791]) # torch.randn(4)

# Apply the Softshrink activation function to the input data
output = g(input)

# Print the input and output data
print("Input data:", input)
print("Output data:", output)

Input data: tensor([-2.3526,  1.1458,  0.4320, -0.3791])
Output data: tensor([-1.8526,  0.6458,  0.0000,  0.0000])

# Create a Softsign activation object
g = nn.Softsign()

# Create a random tensor
input = torch.tensor([-2.3526, 1.1458, 0.4320, -0.3791]) # torch.randn(4)

# Apply the Softsign activation function to the input data
output = g(input)

# Print the input and output data
print("Input data:", input)
print("Output data:", output)

Input data: tensor([-2.3526,  1.1458,  0.4320, -0.3791])
Output data: tensor([-0.7017,  0.5340,  0.3017, -0.2749])

# Create a Threshold activation object
g = nn.Threshold(threshold = 0.1, value = 20)

# Create a random tensor
input = torch.tensor([-2.3526, 1.1458, 0.4320, -0.3791]) # torch.randn(4)

# Apply the Threshold activation function to the input data
output = g(input)

# Print the input and output data
print("Input data:", input)
print("Output data:", output)

Input data: tensor([-2.3526,  1.1458,  0.4320, -0.3791])
Output data: tensor([20.0000,  1.1458,  0.4320, 20.0000])

# Create a GLU activation object
g = nn.GLU(dim = -1)

# Create a random tensor
input = torch.tensor([
    [-0.0915,  0.2352],
    [ 2.2440,  0.5817],
    [ 0.4528,  0.6410],
    [ 0.5200,  0.5567]
]) # torch.randn(4, 2)

# Apply the GLU activation function to the input data
output = g(input)

# Print the input and output data
print("Input data:", input)
print("Output data:", output)

Input data: tensor([[-0.0915,  0.2352],
        [ 2.2440,  0.5817],
        [ 0.4528,  0.6410],
        [ 0.5200,  0.5567]])
Output data: tensor([[-0.0511],
        [ 1.4394],
        [ 0.2966],
        [ 0.3306]])

# Parameters
embed_dim = 3 # Embed dimension
num_heads = 1 # Number of heads

# Create a MultiheadAttention object
multihead_attn = nn.MultiheadAttention(
    embed_dim = embed_dim,
    num_heads = num_heads,
    dropout = 0.0,
    bias = True,
    add_bias_kv = False,
    batch_first = True
)

# Create a random tensor for query, key и value
query = torch.tensor([
    [
        [-1.4025,  0.4318,  0.3431],
        [ 1.0711,  1.3455,  0.3277]],
    [
        [ 1.3409,  1.2159,  0.9589],
        [ 0.5137,  0.4977, -0.6646]],
    [
        [-1.0612,  2.0423,  0.6509],
        [-1.0072,  0.3578, -1.0799]
    ]
]) # (batch size, target sequence length, embed_dim)
key = torch.tensor([
    [
        [-1.4025,  0.4318,  0.3431],
        [ 1.0711,  1.3455,  0.3277]],
    [
        [ 1.3409,  1.2159,  0.9589],
        [ 0.5137,  0.4977, -0.6646]],
    [
        [-1.0612,  2.0423,  0.6509],
        [-1.0072,  0.3578, -1.0799]
    ]
]) # (batch size, source sequence length, embed_dim)
value = torch.tensor([
    [
        [-1.4025,  0.4318,  0.3431],
        [ 1.0711,  1.3455,  0.3277]],
    [
        [ 1.3409,  1.2159,  0.9589],
        [ 0.5137,  0.4977, -0.6646]],
    [
        [-1.0612,  2.0423,  0.6509],
        [-1.0072,  0.3578, -1.0799]
    ]
]) # (batch size, source sequence length, embed_dim)

# Apply the MultiheadAttention activation function to the input data
# attn_output = (batch size, target sequence length, embed_dim)
# attn_output_weights = (batch size, target sequence length, source sequence length)
attn_output, attn_output_weights = multihead_attn(query, key, value)

# Print the output data
print("Attention result:", attn_output)
print("Attention Weights:", attn_output_weights)

Attention result: tensor([[[ 0.2210,  0.3523, -0.3251],
         [ 0.2292,  0.4314, -0.3232]],

        [[ 0.0753, -0.0731, -0.1695],
         [ 0.0983, -0.0579, -0.1872]],

        [[-0.2672,  0.2664,  0.0135],
         [-0.0246,  0.5786, -0.2260]]], grad_fn=<TransposeBackward0>)
Attention Weights: tensor([[[0.3609, 0.6391],
         [0.4675, 0.5325]],

        [[0.4914, 0.5086],
         [0.5231, 0.4769]],

        [[0.1311, 0.8689],
         [0.3989, 0.6011]]], grad_fn=<MeanBackward1>)

Version of Python¶

Import Required Packages¶

Versions of Required Libraries¶

ReLU (Rectified Linear Unit)¶

ELU (Exponential Linear Unit)¶

PReLU (Parametric ReLU)¶

LeakyReLU¶

ReLU6¶

RReLU (Randomized Leaky ReLU)¶

SELU (Scaled Exponential Linear Unit)¶

CELU (Continuously Differentiable Exponential Linear Unit)¶

GELU (Gaussian Error Linear Unit)¶

Sigmoid¶

SiLU (Sigmoid Linear Unit)¶

LogSigmoid¶

Hardsigmoid¶

Tanh¶

Tanhshrink¶

Hardtanh¶

Hardshrink¶

Hardswish¶

Mish¶

Softplus¶

Softshrink¶

Softsign¶

Threshold¶

GLU (Gated Linear Unit)¶

MultiheadAttention¶

	Package	Version
#
1	Torch	2.2.2
2	NumPy	1.26.4
3	Pandas	2.2.3
4	JupyterLab	4.2.5