Process and Save Spectrogram Images from Sound Files

spectrogram_images(
  trainingBasePath,
  outputBasePath,
  splits,
  random = "FALSE",
  windowlength = 512,
  minfreq.khz = 0.4,
  maxfreq.khz = 1.6,
  new.sampleratehz = 16000
)

Arguments

trainingBasePath

Base directory containing the training folders.

outputBasePath

Directory where the processed images will be saved.

splits

Numeric vector specifying the split ratios for train, valid, and test sets. Defaults to c(0.8, 0.1, 0.1).

random

Logical. If TRUE then randomly samples from folder, if FALSE then divides into sets based on alphabetic file name sequence.

windowlength

window length for input into 'spectro' function from seewave. Deafults to 512.

minfreq.khz

Minimum frequency in kHz for the spectrogram. Defaults to 0.4.

maxfreq.khz

Maximum frequency in kHz for the spectrogram. Defaults to 2.

new.sampleratehz

New sample rate in Hz for resampling the audio. Defaults to 16000. Set to 'NA' if no resampling is required.

Value

The function saves spectrogram images to train, valid, or test folders to the specified 'outputBasePath'.

  • Folders with spectrogram images: Depending on specified splits.

Examples

{
  # Load the 'TempBinWav' data
  data("TempBinWav")

  # Define the output directory for saving .wav files
  output.dir <- file.path(tempdir(), "MultiDir", "Soundfiles", "Noise")

  # Create the output directory if it doesn't already exist
  dir.create(output.dir, recursive = TRUE, showWarnings = FALSE)

  # Define the intervals for cutting the wave (from 1 to 30 with a step of 5)
  cutwave.list <- seq(1, 30, 5)

  # Extract subsamples from the waveform
  subsamps <- lapply(1:(length(cutwave.list) - 1), function(i) {
    extractWave(
      TempBinWav,
      from = cutwave.list[i],
      to = cutwave.list[i + 1],
      xunit = "time",
      plot = FALSE,
      output = "Wave"
    )
  })

  # Write the extracted subsamples to .wav files
  lapply(1:length(subsamps), function(i) {
    writeWave(
      subsamps[[i]],
      filename = file.path(output.dir, paste("temp_", i, "_", ".wav", sep = "")),
      extensible = FALSE
    )
  })

  # List all the files in the output directory
  list.files(output.dir)

  # Generate spectrogram images for sound files
  spectrogram_images(
    trainingBasePath = file.path(tempdir(), "MultiDir/Soundfiles/"),
    outputBasePath = file.path(tempdir(), "MultiDir/", "Spectro"),
    splits = c(0.5, 0.5, 0.0),
    new.sampleratehz = "NA"
  )

  # List all the images generated by the spectrogram process
  ListImages <- list.files(file.path(tempdir(), "MultiDir/", "Spectro"), recursive = TRUE)
  print(ListImages)

  # Get the path of a single spectrogram image
  Singlepath <- list.files(
    file.path(tempdir(), "MultiDir", "Spectro"),
    recursive = TRUE,
    full.names = TRUE
  )[1]

  # Set input data path for the training images
  input.data.path <- file.path(tempdir(), "MultiDir", "Spectro", "train")

  # Load images and apply transformations
  train_ds <- image_folder_dataset(
    file.path(input.data.path),
    transform = . %>%
      torchvision::transform_to_tensor() %>%
      torchvision::transform_resize(size = c(224, 224)) %>%
      torchvision::transform_normalize(
        mean = c(0.485, 0.456, 0.406),
        std = c(0.229, 0.224, 0.225)
      ),
    target_transform = function(x) as.double(x) - 1
  )

  # Create a dataloader
  train_dl <- torch::dataloader(train_ds,
  batch_size = train_ds$.length(),
  shuffle = FALSE, drop_last = TRUE)

  # Extract the next batch from the dataloader
  batch <- train_dl$.iter()$.next()

  # Extract the labels for the batch
  classes <- batch[[2]]
  class_names <- list.files(input.data.path, recursive = TRUE)
  class_names <- stringr::str_split_fixed(class_names, pattern = "/", n = 2)[, 1]

  # Convert batch tensor of images to an array and reorder dimensions
  images <- torch::as_array(batch[[1]]) %>% aperm(perm = c(1, 3, 4, 2))

  # Define a function to normalize pixel values
  normalize_pixel_values <- function(image) {
    (image - min(image)) / (max(image) - min(image))
  }

  # Set plotting parameters
  par(mfcol = c(3, 4), mar = rep(1, 4))

  # Display the images after normalizing, with titles for each class
  images <- images %>%
    purrr::array_tree(1) %>%
    purrr::set_names(class_names) %>%
    purrr::map(~ as.raster(normalize_pixel_values(.x)))

  purrr::iwalk(images, ~ {
    plot(.x)
    title(.y)
  })
}
#> Creating spectrogram images
#> [1] "train/Noise/temp_1_.jpg" "train/Noise/temp_2_.jpg"
#> [3] "valid/Noise/temp_3_.jpg" "valid/Noise/temp_4_.jpg"
#> [5] "valid/Noise/temp_5_.jpg"