averaging over 5 folds
Browse files- __pycache__/app.cpython-311.pyc +0 -0
- app.py +13 -5
- example.jpg +0 -0
__pycache__/app.cpython-311.pyc
CHANGED
|
Binary files a/__pycache__/app.cpython-311.pyc and b/__pycache__/app.cpython-311.pyc differ
|
|
|
app.py
CHANGED
|
@@ -25,7 +25,7 @@ from transformers import ASTFeatureExtractor
|
|
| 25 |
FEATURE_EXTRACTOR = ASTFeatureExtractor()
|
| 26 |
|
| 27 |
def plot_mel(sr, x):
|
| 28 |
-
mel_spec = librosa.feature.melspectrogram(y=x, sr=sr, n_mels=
|
| 29 |
mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
|
| 30 |
mel_spec_db = (mel_spec_db - mel_spec_db.min()) / (mel_spec_db.max() - mel_spec_db.min()) # normalize spectrogram to [0,1]
|
| 31 |
mel_spec_db = np.stack([mel_spec_db, mel_spec_db, mel_spec_db], axis=-1) # Convert to 3-channel
|
|
@@ -54,7 +54,9 @@ def plot_wave(sr, x):
|
|
| 54 |
|
| 55 |
def predict(audio, start, end):
|
| 56 |
sr, x = audio
|
| 57 |
-
|
|
|
|
|
|
|
| 58 |
res = preprocess_for_inference(x, sr)
|
| 59 |
|
| 60 |
if start >= end:
|
|
@@ -102,8 +104,10 @@ label_mapping = pd.read_csv('BirdAST_Baseline_5folds_label_map.csv')
|
|
| 102 |
species_id_to_name = {row['species_id']: row['scientific_name'] for index, row in label_mapping.iterrows()}
|
| 103 |
|
| 104 |
def preprocess_for_inference(audio_arr, sr):
|
|
|
|
| 105 |
spec = FEATURE_EXTRACTOR(audio_arr, sampling_rate=sr, padding="max_length", return_tensors="pt")
|
| 106 |
input_values = spec['input_values'] # Get the input values prepared for model input
|
|
|
|
| 107 |
|
| 108 |
# Initialize a list to store predictions from all models
|
| 109 |
model_outputs = []
|
|
@@ -114,16 +118,20 @@ def preprocess_for_inference(audio_arr, sr):
|
|
| 114 |
output = model(input_values)
|
| 115 |
predict_score = F.softmax(output['logits'], dim=1)
|
| 116 |
model_outputs.append(predict_score)
|
|
|
|
|
|
|
| 117 |
|
| 118 |
# Average the predictions across all models
|
| 119 |
-
avg_predictions = torch.mean(torch.
|
|
|
|
| 120 |
|
| 121 |
# Get the top 10 predictions based on the average prediction scores
|
| 122 |
-
topk_values, topk_indices = torch.topk(avg_predictions, 10
|
|
|
|
| 123 |
|
| 124 |
# Initialize results list to store the species names and their associated probabilities
|
| 125 |
results = []
|
| 126 |
-
for idx, scores in zip(topk_indices
|
| 127 |
species_name = species_id_to_name[idx.item()]
|
| 128 |
probability = scores.item()
|
| 129 |
results.append([species_name, probability])
|
|
|
|
| 25 |
FEATURE_EXTRACTOR = ASTFeatureExtractor()
|
| 26 |
|
| 27 |
def plot_mel(sr, x):
|
| 28 |
+
mel_spec = librosa.feature.melspectrogram(y=x, sr=sr, n_mels=128, fmax=10000)
|
| 29 |
mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
|
| 30 |
mel_spec_db = (mel_spec_db - mel_spec_db.min()) / (mel_spec_db.max() - mel_spec_db.min()) # normalize spectrogram to [0,1]
|
| 31 |
mel_spec_db = np.stack([mel_spec_db, mel_spec_db, mel_spec_db], axis=-1) # Convert to 3-channel
|
|
|
|
| 54 |
|
| 55 |
def predict(audio, start, end):
|
| 56 |
sr, x = audio
|
| 57 |
+
|
| 58 |
+
x = np.array(x, dtype=np.float32)/32768.0
|
| 59 |
+
x = x[start*sr : end*sr]
|
| 60 |
res = preprocess_for_inference(x, sr)
|
| 61 |
|
| 62 |
if start >= end:
|
|
|
|
| 104 |
species_id_to_name = {row['species_id']: row['scientific_name'] for index, row in label_mapping.iterrows()}
|
| 105 |
|
| 106 |
def preprocess_for_inference(audio_arr, sr):
|
| 107 |
+
print(sr)
|
| 108 |
spec = FEATURE_EXTRACTOR(audio_arr, sampling_rate=sr, padding="max_length", return_tensors="pt")
|
| 109 |
input_values = spec['input_values'] # Get the input values prepared for model input
|
| 110 |
+
|
| 111 |
|
| 112 |
# Initialize a list to store predictions from all models
|
| 113 |
model_outputs = []
|
|
|
|
| 118 |
output = model(input_values)
|
| 119 |
predict_score = F.softmax(output['logits'], dim=1)
|
| 120 |
model_outputs.append(predict_score)
|
| 121 |
+
print(predict_score[0, 434])
|
| 122 |
+
|
| 123 |
|
| 124 |
# Average the predictions across all models
|
| 125 |
+
avg_predictions = torch.mean(torch.cat(model_outputs), dim=0) #.values
|
| 126 |
+
print(avg_predictions[434])
|
| 127 |
|
| 128 |
# Get the top 10 predictions based on the average prediction scores
|
| 129 |
+
topk_values, topk_indices = torch.topk(avg_predictions, 10)
|
| 130 |
+
print(topk_values.shape, topk_indices.shape)
|
| 131 |
|
| 132 |
# Initialize results list to store the species names and their associated probabilities
|
| 133 |
results = []
|
| 134 |
+
for idx, scores in zip(topk_indices, topk_values):
|
| 135 |
species_name = species_id_to_name[idx.item()]
|
| 136 |
probability = scores.item()
|
| 137 |
results.append([species_name, probability])
|
example.jpg
ADDED
|