Blocks
Basic building blocks to create the Quartznet model
        
InitMode            (str, Enum)
        
    Weight init methods. Used by init_weights.
Note
Possible values are xavier_uniform,xavier_normal,kaiming_uniform and kaiming_normal
Source code in thunder/quartznet/blocks.py
          class InitMode(str, Enum):
    """Weight init methods. Used by [`init_weights`][thunder.quartznet.blocks.init_weights].
    Note:
        Possible values are `xavier_uniform`,`xavier_normal`,`kaiming_uniform` and `kaiming_normal`
    """
    xavier_uniform = "xavier_uniform"
    xavier_normal = "xavier_normal"
    kaiming_uniform = "kaiming_uniform"
    kaiming_normal = "kaiming_normal"
        
MaskedConv1d            (Module)
        
    Source code in thunder/quartznet/blocks.py
          class MaskedConv1d(nn.Module):
    __constants__ = ["use_mask", "padding", "dilation", "kernel_size", "stride"]
    def __init__(
        self,
        in_channels: int,
        out_channels: int,
        kernel_size: _size_1_t,
        stride: _size_1_t = 1,
        padding: _size_1_t = 0,
        dilation: _size_1_t = 1,
        groups: int = 1,
        bias: bool = False,
        use_mask: bool = True,
    ):
        """Masked Convolution.
        This module correspond to a 1d convolution with input masking. Arguments to create are the
        same as nn.Conv1d, but with the addition of use_mask for special behaviour.
        Args:
            in_channels: Same as nn.Conv1d
            out_channels: Same as nn.Conv1d
            kernel_size: Same as nn.Conv1d
            stride: Same as nn.Conv1d
            padding: Same as nn.Conv1d
            dilation: Same as nn.Conv1d
            groups: Same as nn.Conv1d
            bias: Same as nn.Conv1d
            use_mask: Controls the masking of input before the convolution during the forward.
        """
        super().__init__()
        self.use_mask = use_mask
        self.conv = nn.Conv1d(
            in_channels,
            out_channels,
            kernel_size,
            stride=stride,
            padding=padding,
            dilation=dilation,
            groups=groups,
            bias=bias,
        )
        self.padding = self.conv.padding[0]
        self.dilation = self.conv.dilation[0]
        self.kernel_size = self.conv.kernel_size[0]
        self.stride = self.conv.stride[0]
    def get_seq_len(self, lengths: torch.Tensor) -> torch.Tensor:
        """Get the lengths of the inputs after the convolution operation is applied.
        Args:
            lengths: Original lengths of the inputs
        Returns:
            Resulting lengths after the convolution
        """
        return (
            torch.div(
                lengths + 2 * self.padding - self.dilation * (self.kernel_size - 1) - 1,
                self.stride,
                rounding_mode="floor",
            )
            + 1
        )
    def mask_fill(self, x: torch.Tensor, lengths: torch.Tensor) -> torch.Tensor:
        """Mask the input based on it's respective lengths.
        Args:
            x: Signal to be processed, of shape (batch, features, time)
            lengths: Lenghts of each element in the batch of x, with shape (batch)
        Returns:
            The masked signal
        """
        mask = lengths_to_mask(lengths, x.shape[-1])
        return x.masked_fill(~mask.unsqueeze(1), 0)
    def forward(
        self, x: torch.Tensor, lengths: torch.Tensor
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        """Forward method
        Args:
            x: Signal to be processed, of shape (batch, features, time)
            lengths: Lenghts of each element in the batch of x, with shape (batch)
        Returns:
            Both the signal processed by the convolution and the resulting lengths
        """
        if self.use_mask:
            x = self.mask_fill(x, lengths)
        out = self.conv(x)
        return out, self.get_seq_len(lengths)
__init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=False, use_mask=True)
  
      special
  
    Masked Convolution. This module correspond to a 1d convolution with input masking. Arguments to create are the same as nn.Conv1d, but with the addition of use_mask for special behaviour.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| in_channels | int | Same as nn.Conv1d | required | 
| out_channels | int | Same as nn.Conv1d | required | 
| kernel_size | Union[int, Tuple[int]] | Same as nn.Conv1d | required | 
| stride | Union[int, Tuple[int]] | Same as nn.Conv1d | 1 | 
| padding | Union[int, Tuple[int]] | Same as nn.Conv1d | 0 | 
| dilation | Union[int, Tuple[int]] | Same as nn.Conv1d | 1 | 
| groups | int | Same as nn.Conv1d | 1 | 
| bias | bool | Same as nn.Conv1d | False | 
| use_mask | bool | Controls the masking of input before the convolution during the forward. | True | 
Source code in thunder/quartznet/blocks.py
          def __init__(
    self,
    in_channels: int,
    out_channels: int,
    kernel_size: _size_1_t,
    stride: _size_1_t = 1,
    padding: _size_1_t = 0,
    dilation: _size_1_t = 1,
    groups: int = 1,
    bias: bool = False,
    use_mask: bool = True,
):
    """Masked Convolution.
    This module correspond to a 1d convolution with input masking. Arguments to create are the
    same as nn.Conv1d, but with the addition of use_mask for special behaviour.
    Args:
        in_channels: Same as nn.Conv1d
        out_channels: Same as nn.Conv1d
        kernel_size: Same as nn.Conv1d
        stride: Same as nn.Conv1d
        padding: Same as nn.Conv1d
        dilation: Same as nn.Conv1d
        groups: Same as nn.Conv1d
        bias: Same as nn.Conv1d
        use_mask: Controls the masking of input before the convolution during the forward.
    """
    super().__init__()
    self.use_mask = use_mask
    self.conv = nn.Conv1d(
        in_channels,
        out_channels,
        kernel_size,
        stride=stride,
        padding=padding,
        dilation=dilation,
        groups=groups,
        bias=bias,
    )
    self.padding = self.conv.padding[0]
    self.dilation = self.conv.dilation[0]
    self.kernel_size = self.conv.kernel_size[0]
    self.stride = self.conv.stride[0]
forward(self, x, lengths)
    Forward method
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| x | Tensor | Signal to be processed, of shape (batch, features, time) | required | 
| lengths | Tensor | Lenghts of each element in the batch of x, with shape (batch) | required | 
Returns:
| Type | Description | 
|---|---|
| Tuple[torch.Tensor, torch.Tensor] | Both the signal processed by the convolution and the resulting lengths | 
Source code in thunder/quartznet/blocks.py
          def forward(
    self, x: torch.Tensor, lengths: torch.Tensor
) -> Tuple[torch.Tensor, torch.Tensor]:
    """Forward method
    Args:
        x: Signal to be processed, of shape (batch, features, time)
        lengths: Lenghts of each element in the batch of x, with shape (batch)
    Returns:
        Both the signal processed by the convolution and the resulting lengths
    """
    if self.use_mask:
        x = self.mask_fill(x, lengths)
    out = self.conv(x)
    return out, self.get_seq_len(lengths)
get_seq_len(self, lengths)
    Get the lengths of the inputs after the convolution operation is applied.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| lengths | Tensor | Original lengths of the inputs | required | 
Returns:
| Type | Description | 
|---|---|
| Tensor | Resulting lengths after the convolution | 
Source code in thunder/quartznet/blocks.py
          def get_seq_len(self, lengths: torch.Tensor) -> torch.Tensor:
    """Get the lengths of the inputs after the convolution operation is applied.
    Args:
        lengths: Original lengths of the inputs
    Returns:
        Resulting lengths after the convolution
    """
    return (
        torch.div(
            lengths + 2 * self.padding - self.dilation * (self.kernel_size - 1) - 1,
            self.stride,
            rounding_mode="floor",
        )
        + 1
    )
mask_fill(self, x, lengths)
    Mask the input based on it's respective lengths.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| x | Tensor | Signal to be processed, of shape (batch, features, time) | required | 
| lengths | Tensor | Lenghts of each element in the batch of x, with shape (batch) | required | 
Returns:
| Type | Description | 
|---|---|
| Tensor | The masked signal | 
Source code in thunder/quartznet/blocks.py
          def mask_fill(self, x: torch.Tensor, lengths: torch.Tensor) -> torch.Tensor:
    """Mask the input based on it's respective lengths.
    Args:
        x: Signal to be processed, of shape (batch, features, time)
        lengths: Lenghts of each element in the batch of x, with shape (batch)
    Returns:
        The masked signal
    """
    mask = lengths_to_mask(lengths, x.shape[-1])
    return x.masked_fill(~mask.unsqueeze(1), 0)
        
QuartznetBlock            (Module)
        
    Source code in thunder/quartznet/blocks.py
          class QuartznetBlock(nn.Module):
    def __init__(
        self,
        in_channels: int,
        out_channels: int,
        repeat: int = 5,
        kernel_size: _size_1_t = (11,),
        stride: _size_1_t = (1,),
        dilation: _size_1_t = (1,),
        dropout: float = 0.0,
        residual: bool = True,
        separable: bool = False,
    ):
        """Quartznet block. This is a refactoring of the Jasperblock present on the NeMo toolkit,
        but simplified to only support the new quartznet model. Biggest change is that
        dense residual used on Jasper is not supported here, and masked convolutions were also removed.
        Args:
            in_channels: Number of input channels
            out_channels: Number of output channels
            repeat: Repetitions inside block.
            kernel_size: Kernel size.
            stride: Stride of each repetition.
            dilation: Dilation of each repetition.
            dropout: Dropout used before each activation.
            residual: Controls the use of residual connection.
            separable: Controls the use of separable convolutions.
        """
        super().__init__()
        padding_val = get_same_padding(kernel_size[0], stride[0], dilation[0])
        inplanes_loop = in_channels
        conv = []
        for _ in range(repeat - 1):
            conv.extend(
                _get_conv_bn_layer(
                    inplanes_loop,
                    out_channels,
                    kernel_size=kernel_size,
                    stride=stride,
                    dilation=dilation,
                    padding=padding_val,
                    separable=separable,
                    bias=False,
                )
            )
            conv.extend(_get_act_dropout_layer(drop_prob=dropout))
            inplanes_loop = out_channels
        conv.extend(
            _get_conv_bn_layer(
                inplanes_loop,
                out_channels,
                kernel_size=kernel_size,
                stride=stride,
                dilation=dilation,
                padding=padding_val,
                separable=separable,
                bias=False,
            )
        )
        self.mconv = MultiSequential(*conv)
        if residual:
            stride_residual = stride if stride[0] == 1 else stride[0] ** repeat
            self.res = MultiSequential(
                *_get_conv_bn_layer(
                    in_channels,
                    out_channels,
                    kernel_size=1,
                    stride=stride_residual,
                    bias=False,
                )
            )
        else:
            self.res = None
        self.mout = MultiSequential(*_get_act_dropout_layer(drop_prob=dropout))
    def forward(
        self, x: torch.Tensor, lengths: torch.Tensor
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        Args:
            x: Tensor of shape (batch, features, time) where #features == inplanes
        Returns:
            Result of applying the block on the input, and corresponding output lengths
        """
        # compute forward convolutions
        out, lengths_out = self.mconv(x, lengths)
        # compute the residuals
        if self.res is not None:
            res_out, _ = self.res(x, lengths)
            out = out + res_out
        # compute the output
        out, lengths_out = self.mout(out, lengths_out)
        return out, lengths_out
__init__(self, in_channels, out_channels, repeat=5, kernel_size=(11,), stride=(1,), dilation=(1,), dropout=0.0, residual=True, separable=False)
  
      special
  
    Quartznet block. This is a refactoring of the Jasperblock present on the NeMo toolkit, but simplified to only support the new quartznet model. Biggest change is that dense residual used on Jasper is not supported here, and masked convolutions were also removed.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| in_channels | int | Number of input channels | required | 
| out_channels | int | Number of output channels | required | 
| repeat | int | Repetitions inside block. | 5 | 
| kernel_size | Union[int, Tuple[int]] | Kernel size. | (11,) | 
| stride | Union[int, Tuple[int]] | Stride of each repetition. | (1,) | 
| dilation | Union[int, Tuple[int]] | Dilation of each repetition. | (1,) | 
| dropout | float | Dropout used before each activation. | 0.0 | 
| residual | bool | Controls the use of residual connection. | True | 
| separable | bool | Controls the use of separable convolutions. | False | 
Source code in thunder/quartznet/blocks.py
          def __init__(
    self,
    in_channels: int,
    out_channels: int,
    repeat: int = 5,
    kernel_size: _size_1_t = (11,),
    stride: _size_1_t = (1,),
    dilation: _size_1_t = (1,),
    dropout: float = 0.0,
    residual: bool = True,
    separable: bool = False,
):
    """Quartznet block. This is a refactoring of the Jasperblock present on the NeMo toolkit,
    but simplified to only support the new quartznet model. Biggest change is that
    dense residual used on Jasper is not supported here, and masked convolutions were also removed.
    Args:
        in_channels: Number of input channels
        out_channels: Number of output channels
        repeat: Repetitions inside block.
        kernel_size: Kernel size.
        stride: Stride of each repetition.
        dilation: Dilation of each repetition.
        dropout: Dropout used before each activation.
        residual: Controls the use of residual connection.
        separable: Controls the use of separable convolutions.
    """
    super().__init__()
    padding_val = get_same_padding(kernel_size[0], stride[0], dilation[0])
    inplanes_loop = in_channels
    conv = []
    for _ in range(repeat - 1):
        conv.extend(
            _get_conv_bn_layer(
                inplanes_loop,
                out_channels,
                kernel_size=kernel_size,
                stride=stride,
                dilation=dilation,
                padding=padding_val,
                separable=separable,
                bias=False,
            )
        )
        conv.extend(_get_act_dropout_layer(drop_prob=dropout))
        inplanes_loop = out_channels
    conv.extend(
        _get_conv_bn_layer(
            inplanes_loop,
            out_channels,
            kernel_size=kernel_size,
            stride=stride,
            dilation=dilation,
            padding=padding_val,
            separable=separable,
            bias=False,
        )
    )
    self.mconv = MultiSequential(*conv)
    if residual:
        stride_residual = stride if stride[0] == 1 else stride[0] ** repeat
        self.res = MultiSequential(
            *_get_conv_bn_layer(
                in_channels,
                out_channels,
                kernel_size=1,
                stride=stride_residual,
                bias=False,
            )
        )
    else:
        self.res = None
    self.mout = MultiSequential(*_get_act_dropout_layer(drop_prob=dropout))
forward(self, x, lengths)
    Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| x | Tensor | Tensor of shape (batch, features, time) where #features == inplanes | required | 
Returns:
| Type | Description | 
|---|---|
| Tuple[torch.Tensor, torch.Tensor] | Result of applying the block on the input, and corresponding output lengths | 
Source code in thunder/quartznet/blocks.py
          def forward(
    self, x: torch.Tensor, lengths: torch.Tensor
) -> Tuple[torch.Tensor, torch.Tensor]:
    """
    Args:
        x: Tensor of shape (batch, features, time) where #features == inplanes
    Returns:
        Result of applying the block on the input, and corresponding output lengths
    """
    # compute forward convolutions
    out, lengths_out = self.mconv(x, lengths)
    # compute the residuals
    if self.res is not None:
        res_out, _ = self.res(x, lengths)
        out = out + res_out
    # compute the output
    out, lengths_out = self.mout(out, lengths_out)
    return out, lengths_out
QuartznetEncoder(feat_in=64, filters=[256, 256, 512, 512, 512], kernel_sizes=[33, 39, 51, 63, 75], repeat_blocks=1, dropout=0.0)
    Basic Quartznet encoder setup. Can be used to build either Quartznet5x5 (repeat_blocks=1) or Quartznet15x5 (repeat_blocks=3)
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| feat_in | int | Number of input features to the model. | 64 | 
| filters | List[int] | List of filter sizes used to create the encoder blocks. | [256, 256, 512, 512, 512] | 
| kernel_sizes | List[int] | List of kernel sizes corresponding to each filter size. | [33, 39, 51, 63, 75] | 
| repeat_blocks | int | Number of repetitions of each block. | 1 | 
Returns:
| Type | Description | 
|---|---|
| Module | Pytorch model corresponding to the encoder. | 
Source code in thunder/quartznet/blocks.py
          def QuartznetEncoder(
    feat_in: int = 64,
    filters: List[int] = [256, 256, 512, 512, 512],
    kernel_sizes: List[int] = [33, 39, 51, 63, 75],
    repeat_blocks: int = 1,
    dropout: float = 0.0,
) -> nn.Module:
    """Basic Quartznet encoder setup.
    Can be used to build either Quartznet5x5 (repeat_blocks=1) or Quartznet15x5 (repeat_blocks=3)
    Args:
        feat_in: Number of input features to the model.
        filters: List of filter sizes used to create the encoder blocks.
        kernel_sizes: List of kernel sizes corresponding to each filter size.
        repeat_blocks: Number of repetitions of each block.
    Returns:
        Pytorch model corresponding to the encoder.
    """
    return MultiSequential(
        stem(feat_in),
        *body(filters, kernel_sizes, repeat_blocks, dropout),
    )
body(filters, kernel_size, repeat_blocks=1, dropout=0.0)
    Creates the body of the Quartznet model. That is the middle part.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| filters | List[int] | List of filters inside each block in the body. | required | 
| kernel_size | List[int] | Corresponding list of kernel sizes for each block. Should have the same length as the first argument. | required | 
| repeat_blocks | int | Number of repetitions of each block inside the body. | 1 | 
Returns:
| Type | Description | 
|---|---|
| List[thunder.quartznet.blocks.QuartznetBlock] | List of layers that form the body of the network. | 
Source code in thunder/quartznet/blocks.py
          def body(
    filters: List[int],
    kernel_size: List[int],
    repeat_blocks: int = 1,
    dropout: float = 0.0,
) -> List[QuartznetBlock]:
    """Creates the body of the Quartznet model. That is the middle part.
    Args:
        filters: List of filters inside each block in the body.
        kernel_size: Corresponding list of kernel sizes for each block. Should have the same length as the first argument.
        repeat_blocks: Number of repetitions of each block inside the body.
    Returns:
        List of layers that form the body of the network.
    """
    layers = []
    f_in = 256
    for f, k in zip(filters, kernel_size):
        for _ in range(repeat_blocks):
            layers.append(
                QuartznetBlock(
                    f_in, f, kernel_size=(k,), separable=True, dropout=dropout
                )
            )
            f_in = f
    layers.extend(
        [
            QuartznetBlock(
                f_in,
                512,
                repeat=1,
                dilation=(2,),
                kernel_size=(87,),
                residual=False,
                separable=True,
                dropout=dropout,
            ),
            QuartznetBlock(
                512,
                1024,
                repeat=1,
                kernel_size=(1,),
                residual=False,
                separable=False,
                dropout=dropout,
            ),
        ]
    )
    return layers
init_weights(m, mode=<InitMode.xavier_uniform: 'xavier_uniform'>)
    Initialize Linear, Conv1d or BatchNorm1d weights. There's no return, the operation occurs inplace.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| m | Module | The layer to be initialized | required | 
| mode | InitMode | Weight initialization mode. Only applicable to linear and conv layers. | <InitMode.xavier_uniform: 'xavier_uniform'> | 
Exceptions:
| Type | Description | 
|---|---|
| ValueError | Raised when the initial mode is not one of the possible options. | 
Source code in thunder/quartznet/blocks.py
          def init_weights(m: nn.Module, mode: InitMode = InitMode.xavier_uniform):
    """Initialize Linear, Conv1d or BatchNorm1d weights.
    There's no return, the operation occurs inplace.
    Args:
        m: The layer to be initialized
        mode: Weight initialization mode. Only applicable to linear and conv layers.
    Raises:
        ValueError: Raised when the initial mode is not one of the possible options.
    """
    if isinstance(m, MaskedConv1d):
        init_weights(m.conv, mode)
    if isinstance(m, (nn.Conv1d, nn.Linear)):
        if mode == InitMode.xavier_uniform:
            nn.init.xavier_uniform_(m.weight, gain=1.0)
        elif mode == InitMode.xavier_normal:
            nn.init.xavier_normal_(m.weight, gain=1.0)
        elif mode == InitMode.kaiming_uniform:
            nn.init.kaiming_uniform_(m.weight, nonlinearity="relu")
        elif mode == InitMode.kaiming_normal:
            nn.init.kaiming_normal_(m.weight, nonlinearity="relu")
        else:
            raise ValueError(f"Unknown Initialization mode: {mode}")
    elif isinstance(m, nn.BatchNorm1d):
        if m.track_running_stats:
            m.running_mean.zero_()
            m.running_var.fill_(1)
            m.num_batches_tracked.zero_()
        if m.affine:
            nn.init.ones_(m.weight)
            nn.init.zeros_(m.bias)
stem(feat_in)
    Creates the Quartznet stem. That is the first block of the model, that process the input directly.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| feat_in | int | Number of input features | required | 
Returns:
| Type | Description | 
|---|---|
| QuartznetBlock | Quartznet stem block | 
Source code in thunder/quartznet/blocks.py
          def stem(feat_in: int) -> QuartznetBlock:
    """Creates the Quartznet stem. That is the first block of the model, that process the input directly.
    Args:
        feat_in: Number of input features
    Returns:
        Quartznet stem block
    """
    return QuartznetBlock(
        feat_in,
        256,
        repeat=1,
        stride=(2,),
        kernel_size=(33,),
        residual=False,
        separable=True,
    )