wave_compare.py 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. from datetime import datetime
  2. import os
  3. os.environ["NUMBA_DISABLE_FUNCTION_CACHING"] = "1"
  4. import librosa
  5. # from matplotlib import pyplot
  6. # from librosa.feature import mfcc
  7. import numpy as np
  8. from scipy.spatial.distance import euclidean
  9. def extract_mfcc(filename, num_mfcc=13):
  10. # 取低频维度上的部分值输出,语音能量大多集中在低频域,数值一般取13。
  11. print("start extract_mfcc time:" + filename + str(datetime.now()))
  12. audio, sample_rate = librosa.load(filename)
  13. # pyplot.figure(figsize=(14, 5))
  14. # librosa.display.waveshow(y=audio, sr=sample_rate)
  15. # pyplot.title('Wave Form of ' + filename)
  16. # pyplot.show()
  17. # 使用librosa.feature.mfcc而不是直接导入mfcc
  18. mfcc_result = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=num_mfcc)
  19. mean_mfcc = np.mean(mfcc_result, axis=1)
  20. print("end extract_mfcc time:" + filename + str(datetime.now()))
  21. return mean_mfcc
  22. def compute_similarity(feature_1, feature_2):
  23. print("start compute_similarity time:" + str(datetime.now()))
  24. time_start = datetime.now()
  25. distance = euclidean(feature_1, feature_2)
  26. result = (100-distance)/100
  27. # 以下2种弃用的计算方式
  28. # result = 1-(distance*distance / 10000)
  29. # result = 1 - np.square((100-distance)/100)
  30. if result < 0:
  31. result = 0
  32. time_end = datetime.now()
  33. time_diff = (time_end - time_start).total_seconds()
  34. print("end compute_similarity time:" + str(datetime.now()))
  35. res={
  36. "result":result,
  37. "time_diff":time_diff
  38. }
  39. return res