import os
import pathlib
import glob
import numpy as np
import tensorflow as tf
# Use seed 66 for consistant experiments
seed = 66
tf.random.set_seed(seed)
np.random.seed(seed)
choices = ['digits', 'letters', 'mixed']
dataset_type = choices[0]
data_dir = pathlib.Path('data/avicar_some'+dataset_type)
if dataset_type == 'digits':
label_strings = np.array([str(num) for num in range(0,10)])
elif dataset_type == 'letters':
label_strings = np.array(list(string.ascii_uppercase))
else:
label_strings = np.array([str(num) for num in range(0,10)] + list(string.ascii_uppercase))
print(dataset_type + ":", label_strings)
filenames = tf.io.gfile.glob(str(data_dir)+"*/*.wav")
print("Example filename:", filenames[99])
print()
if dataset_type == "digits":
# Filter out non-digit files
filenames = [fname for fname in filenames if fname[32].isdigit()]
# Count # of examples for each label
for i in range(len(label_strings)):
num_examples = len([fname for fname in filenames if fname[32]==label_strings[i]])
print(f"""# examples for "{label_strings[i]}": {num_examples}""")
elif dataset_type == 'letters':
# Filter out non-letter files
filenames = [fname for fname in filenames if not(fname[32].isdigit())]
# Count # of examples for each label
for i in range(len(label_strings)):
num_examples = len([fname for fname in filenames if fname[33]==label_strings[i].upper()])
print(f"""# examples for "{label_strings[i]}": {num_examples}""")
num_samples = len(filenames)
print('# total examples:', num_samples)
print()
filenames = tf.random.shuffle(filenames)
TRAIN_PORTION = 0.7
TEST_PORTION = 0.3
train_end = int(num_samples*TRAIN_PORTION)
train_files = filenames[:train_end]
test_files = filenames[train_end:]
print('Training set size:', len(train_files))
print('Test set size:', len(test_files))
from python_speech_features import mfcc
import scipy.io.wavfile as wav
def get_mfcc_dict(filenames):
mfccs = {}
for wave in filenames:
wave = wave.numpy().decode('utf-8')
(rate, sig) = wav.read(wave)
mfccs[wave] = mfcc(sig, rate, nfft=2000)
return mfccs
mfccs_train = get_mfcc_dict(train_files)
mfccs_test = get_mfcc_dict(test_files)
def get_label(file_path):
parts = tf.strings.split(file_path, os.path.sep)
label = tf.strings.substr(parts[-1], pos=9, len=1)
label_str = label.numpy().decode('utf-8')
return label_str
def preprocess_mfcc(mfccs):
X = []
y = []
for filename in mfccs:
X.append(np.mean(mfccs[filename], axis=0))
label = get_label(filename)
y.append(label)
return np.array(X), np.array(y)
training_data, training_label = preprocess_mfcc(mfccs_train)
testing_data, testing_label = preprocess_mfcc(mfccs_test)
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import SGDClassifier
model = SGDClassifier(max_iter=10000)
model.fit(training_data, training_label)
cross_val_score(model, training_data, training_label, cv=10, scoring='accuracy')
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()
model.fit(training_data, training_label)
cross_val_score(model, training_data, training_label, cv=10, scoring='accuracy')
from sklearn.neural_network import MLPClassifier
model = MLPClassifier(max_iter=10000)
model.fit(training_data, training_label)
cross_val_score(model, training_data, training_label, cv=10, scoring='accuracy')
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
model.fit(training_data, training_label)
cross_val_score(model, training_data, training_label, cv=10, scoring='accuracy')
from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier()
model.fit(training_data, training_label)
cross_val_score(model, training_data, training_label, cv=10, scoring='accuracy')