12345678910111213141516171819202122232425262728293031323334353637383940414243444546 |
- from datetime import datetime
- import os
- os.environ["NUMBA_DISABLE_FUNCTION_CACHING"] = "1"
- import librosa
- # from matplotlib import pyplot
- # from librosa.feature import mfcc
- import numpy as np
- from scipy.spatial.distance import euclidean
- def extract_mfcc(filename, num_mfcc=13):
- # 取低频维度上的部分值输出,语音能量大多集中在低频域,数值一般取13。
- print("start extract_mfcc time:" + filename + str(datetime.now()))
- audio, sample_rate = librosa.load(filename)
- # pyplot.figure(figsize=(14, 5))
- # librosa.display.waveshow(y=audio, sr=sample_rate)
- # pyplot.title('Wave Form of ' + filename)
- # pyplot.show()
- # 使用librosa.feature.mfcc而不是直接导入mfcc
- mfcc_result = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=num_mfcc)
- mean_mfcc = np.mean(mfcc_result, axis=1)
- print("end extract_mfcc time:" + filename + str(datetime.now()))
- return mean_mfcc
- def compute_similarity(feature_1, feature_2):
- print("start compute_similarity time:" + str(datetime.now()))
- time_start = datetime.now()
- distance = euclidean(feature_1, feature_2)
- result = (100-distance)/100
- # 以下2种弃用的计算方式
- # result = 1-(distance*distance / 10000)
- # result = 1 - np.square((100-distance)/100)
- if result < 0:
- result = 0
- time_end = datetime.now()
- time_diff = (time_end - time_start).total_seconds()
- print("end compute_similarity time:" + str(datetime.now()))
- res={
- "result":result,
- "time_diff":time_diff
- }
- return res
|