Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Regression Test Pipeline #120

Open
wants to merge 34 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
daa68c8
Abhinay1997 Apr 20, 2024
e2e5632
Merge branch 'main' into wer_utils
Abhinay1997 Apr 22, 2024
6af9f5a
Add basic Fraction type to handle Number normalization
Abhinay1997 May 2, 2024
87c230a
Add EnglishNumberNormalizer
Abhinay1997 May 2, 2024
d8cda9f
Merge branch 'main' into wer_utils
Abhinay1997 May 2, 2024
b8c30fe
Adds Basic Fraction type for WER
Abhinay1997 May 4, 2024
06e66e4
Refactor + Add english normalizers
Abhinay1997 May 4, 2024
3334d44
Bug fixes in number normalization. regex, multiplier processing.
Abhinay1997 May 8, 2024
da3a719
wer evaluate function + string optimization
Abhinay1997 May 10, 2024
acb80ff
Add wer test on long audio
Abhinay1997 May 10, 2024
dbbf9bf
Remove Wagner-Fischer, fix normalization bugs.
Abhinay1997 May 28, 2024
16a5525
Hirschberg's LCS Algorithm for edit operations
Abhinay1997 May 28, 2024
70456b3
Remove warnings in Fraction implementation
Abhinay1997 May 28, 2024
a3c94cc
Add tests
Abhinay1997 May 28, 2024
b7e52fa
Merge branch 'main' into wer_utils
Abhinay1997 May 28, 2024
60f8956
Refactoring
Abhinay1997 May 29, 2024
89df136
Refactor regression tests
Abhinay1997 Jun 11, 2024
ad13284
Add WER to regression test results, fix overflow
Abhinay1997 Jun 11, 2024
47be844
clean up files
Abhinay1997 Jun 11, 2024
bf46309
Merge branch 'main' into wer_utils
Abhinay1997 Jun 11, 2024
6296506
patch overflow for now.
Abhinay1997 Jun 11, 2024
6a28fc1
Re-add file needed for tests
Abhinay1997 Jun 12, 2024
26bb7c6
Fix xcode test attachment
ZachNagengast Jul 28, 2024
01baf7b
Fix overflow when using Int.
Abhinay1997 Aug 2, 2024
cca6f50
Add flag to run only on first audio file of the dataset
Abhinay1997 Aug 2, 2024
3fceef3
Abhinay1997 Aug 6, 2024
ad4c7f5
PR Clenup:
Abhinay1997 Aug 6, 2024
74ad9be
Merge branch 'main' into wer_utils
ZachNagengast Aug 6, 2024
525657b
Adds system memory, disk space and battery level tracking.
Abhinay1997 Aug 12, 2024
83ffc3f
Remove sample JSON
Abhinay1997 Aug 12, 2024
a8d6e27
Merge branch 'main' into wer_utils
Abhinay1997 Aug 12, 2024
2f3be51
Fix compilation on non macOS
Abhinay1997 Aug 12, 2024
d9bc43b
Fix battery checks for watchOS
Abhinay1997 Aug 12, 2024
c99bd94
Fix imports
Abhinay1997 Aug 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"failureInfo":{},"device":"Apple M1\n","modelsTested":["tiny"]}

Large diffs are not rendered by default.

248 changes: 248 additions & 0 deletions Tests/WhisperKitTests/Evaluate/Fraction.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,248 @@
// Simple Fraction implementation for the normalization code.
// Doesn't do everything the python module fractions can do.
import Foundation

struct Fraction{
var numerator: Int
var denominator: Int

var description: String {
"\(numerator)/\(denominator)"
}

init?(numerator: Int, denominator: Int){
guard denominator != 0 else { return nil }
guard numerator > Int.min, denominator > Int.min else { return nil }

self.numerator = numerator
self.denominator = denominator
if denominator < 0{
self.numerator = -1 * self.numerator
self.denominator = -1 * self.denominator
}
self.simplify()
}

init?(_ value: Double){
if value == Double.infinity || value == Double.nan{
return nil
}
if value == 0.0{
self.init(numerator: 0, denominator: 1)
}
else if let (n,d) = Double.toIntegerNumberRatio(value: value){
self.init(numerator: n, denominator: d)
}else{
return nil
}
}

init?(_ value: Float){
self.init(Double(value))
}

init?(_ value: String){
let rationalFormatPattern = """
\\A\\s*
(?<sign>[-+]?)?
(?=\\d|\\.\\d)
(?<num>\\d*|\\d+(_\\d+)*)?
(?:\\.(?<decimal>\\d+(_\\d+)*))?
(?:\\s*/\\s*(?<denom>\\d+(_\\d+)*))?
(?:E(?<exp>[-+]?\\d+(_\\d+)*))?
\\s*\\Z
"""

let regex = try? NSRegularExpression(pattern: rationalFormatPattern, options: [.allowCommentsAndWhitespace, .caseInsensitive])
guard let regex = regex else { return nil}
let range = NSRange(location: 0, length: value.utf16.count)
var matches : [String:String] = [:]
if let match = regex.firstMatch(in: value, options: [], range: range) {
let groups = ["sign", "num", "denom", "decimal", "exp"]
for group in groups {
if let range = Range(match.range(withName: group), in: value) {
matches[group] = String(value[range])
}
}
}
if matches.count == 0{ return nil}
// catch overflow if matches[num] will exceed size of Int64
if matches["num"]?.count ?? 0 > 19{ return nil}
var numerator = Int(matches["num"] ?? "0")!
var denominator: Int

if let denom = matches["denom"]{
denominator = Int(denom)!
}
else{
denominator = 1
if var decimal = matches["decimal"]{
decimal = decimal.replacingOccurrences(of: "_", with: "")
let scale = Int(pow(Double(10), Double(decimal.count))) //10**len(decimal)
guard let d = Int(decimal) else {return nil}
numerator = numerator * scale + d
denominator *= scale
}

if matches["exp"] != nil, let exponent = Int(matches["exp"]!){
if exponent >= 0{
numerator *= Int(pow(Double(10), Double(exponent)))
}else{
denominator *= Int(pow(Double(10), Double(-exponent)))
}
}
}
if matches["sign"] == "-"{
numerator = -numerator
}

self.init(numerator: numerator, denominator: denominator)
}

static func gcd(lhs:Int,rhs:Int) -> Int{
var a = lhs
var b = rhs
while b != 0 { (a, b) = (b, a % b) }
return a
}

static func lcm(lhs:Int,rhs:Int) -> Int{
return (lhs * rhs / gcd(lhs:lhs, rhs:rhs))
}

mutating func simplify(){
var divisor = Fraction.gcd(lhs: numerator, rhs: denominator)
if divisor < 0 { divisor *= -1 }
self.numerator = Int(numerator / divisor)
self.denominator = Int(denominator / divisor)
}

static func +(lhs: Fraction, rhs: Fraction) -> Fraction?{
let na = lhs.numerator
let nb = rhs.numerator
let da = lhs.denominator
let db = rhs.denominator
let g = Fraction.gcd(lhs: da, rhs: db)
if g == 1{
return Fraction(numerator: na * db + da * nb, denominator: da * db)
}
let s = da / g
let t = na * (db / g) + nb * s
let g2 = Fraction.gcd(lhs: t, rhs: g)
if g2 == 1{
return Fraction(numerator: t, denominator: s * db)
}
return Fraction(numerator: t / g2, denominator: s * (db / g2))
}

static func -(lhs: Fraction, rhs: Fraction) -> Fraction?{
let na = lhs.numerator
let nb = rhs.numerator
let da = lhs.denominator
let db = rhs.denominator
let g = Fraction.gcd(lhs: da, rhs: db)
if g == 1{
return Fraction(numerator: na * db - da * nb, denominator: da * db)
}
let s = da / g
let t = na * (db / g) - nb * s
let g2 = Fraction.gcd(lhs: t, rhs: g)
if g2 == 1{
return Fraction(numerator: t, denominator: s * db)
}
return Fraction(numerator: t / g2, denominator: s * (db / g2))
}

static func *(lhs: Fraction, rhs: Fraction) -> Fraction?{
return Fraction(numerator:lhs.numerator * rhs.numerator, denominator:lhs.denominator * rhs.denominator)
}

static func /(lhs: Fraction, rhs: Fraction) -> Fraction?{
return Fraction(numerator:lhs.numerator * rhs.denominator, denominator:lhs.denominator * rhs.numerator)
}

}

extension Fraction: Equatable{
static func == (lhs: Fraction, rhs: Fraction) -> Bool{
if lhs.numerator == rhs.numerator, lhs.denominator == rhs.denominator{
return true
}
return false
}
}

// MARK: Fraction operations with Int's
extension Fraction{
static func +(lhs: Int, rhs: Fraction) -> Fraction?{
guard let lhsFraction = Fraction(numerator: lhs, denominator: 1) else {return rhs}
return lhsFraction + rhs
}

static func +(lhs: Fraction, rhs: Int) -> Fraction?{
guard let rhsFraction = Fraction(numerator: rhs, denominator: 1) else {return lhs}
return lhs + rhsFraction
}

static func -(lhs: Int, rhs: Fraction) -> Fraction?{
guard let lhsFraction = Fraction(numerator: lhs, denominator: 1) else {return rhs}
return lhsFraction - rhs
}

static func -(lhs: Fraction, rhs: Int) -> Fraction?{
guard let rhsFraction = Fraction(numerator: rhs, denominator: 1) else {return lhs}
return lhs - rhsFraction
}

static func *(lhs: Fraction, rhs: Int) -> Fraction?{
guard let rhsFraction = Fraction(numerator: rhs, denominator: 1) else {return lhs}
return lhs * rhsFraction
}

static func *(lhs: Int, rhs: Fraction) -> Fraction?{
guard let lhsFraction = Fraction(numerator: lhs, denominator: 1) else {return rhs}
return lhsFraction * rhs
}

static func /(lhs: Fraction, rhs: Int) -> Fraction?{
guard let rhsFraction = Fraction(numerator: rhs, denominator: 1) else {return lhs}
return lhs / rhsFraction
}

static func /(lhs: Int, rhs: Fraction) -> Fraction?{
guard let lhsFraction = Fraction(numerator: lhs, denominator: 1) else {return rhs}
return lhsFraction / rhs
}
}

extension Double{
static func toIntegerNumberRatio(value: Double) -> (Int,Int)?{
var floatPart: Double = value.significand
var exponent: Int = value.exponent
var numerator: Int
var denominator: Int

for _ in 0..<300 where floatPart != floatPart.rounded(.down){
floatPart *= 2.0
exponent -= 1
}

if floatPart == Double.infinity || floatPart == Double.nan{
return nil
}

numerator = Int(floatPart.rounded(.down))
denominator = 1

if exponent > 0{
numerator <<= exponent
}
else{
denominator <<= -exponent
}
return (numerator, denominator)
}
}



128 changes: 128 additions & 0 deletions Tests/WhisperKitTests/Evaluate/Hirschberg.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
import Foundation

//Compute the last row of the edit distance dynamic programming matrix
//between s1 and s2.
func computeLastRow(_ s1Chars: Array<Unicode.Scalar>, _ s2Chars: Array<Unicode.Scalar>) -> [Int] {

var prevRow = Array(0...s2Chars.endIndex)

for i in 1...s1Chars.endIndex {
var currentRow = [Int](repeating: 0, count: s2Chars.endIndex + 1)
currentRow[0] = i

for j in 1...s2Chars.endIndex {
let cost = s1Chars[i - 1] == s2Chars[j - 1] ? 0 : 1
currentRow[j] = min(
prevRow[j] + 1, // Deletion
currentRow[j - 1] + 1, // Insertion
prevRow[j - 1] + cost // Substitution
)
}
prevRow = currentRow
}

return prevRow
}

func needlemanWunsch(_ xArray: Array<Unicode.Scalar>, _ yArray: Array<Unicode.Scalar>) -> [EditOp] {
let m = xArray.count
let n = yArray.count

var dp = [[Int]](repeating: [Int](repeating: 0, count: n + 1), count: m + 1)
for i in 1...m {
dp[i][0] = i
}
for j in 1...n {
dp[0][j] = j
}

for i in 1...m {
for j in 1...n {
let cost = xArray[i - 1] == yArray[j - 1] ? 0 : 1
dp[i][j] = min(
dp[i - 1][j] + 1, // Deletion
dp[i][j - 1] + 1, // Insertion
dp[i - 1][j - 1] + cost // Substitution
)
}
}

var i = m
var j = n
var ops = [EditOp]()

while i > 0 && j > 0 {
if dp[i][j] == dp[i - 1][j - 1] && xArray[i - 1] == yArray[j - 1] {
// Match operation is omitted
i -= 1
j -= 1
} else if dp[i][j] == dp[i - 1][j - 1] + 1 {
ops.append(EditOp.replace) // Substitution
i -= 1
j -= 1
} else if dp[i][j] == dp[i][j - 1] + 1 {
ops.append(EditOp.insert) // Insertion
j -= 1
} else {
ops.append(EditOp.delete) // Deletion
i -= 1
}
}

while i > 0 {
ops.append(EditOp.delete)
i -= 1
}
while j > 0 {
ops.append(EditOp.insert)
j -= 1
}

return ops.reversed()
}


func hirschberg(_ reference: Array<Unicode.Scalar>, _ s2: Array<Unicode.Scalar>) -> [EditOp] {

func hirschbergRec(_ x: Array<Unicode.Scalar>, _ y: Array<Unicode.Scalar>) -> [EditOp] {

let m = x.endIndex
let n = y.endIndex

if m == 0 {
let result = y.map { _ in EditOp.insert }
return result
}
if n == 0 {
let result = x.map { _ in EditOp.delete }
return result
}
if m == 1 || n == 1 {
let result = needlemanWunsch(x, y)
return result
}

let i = m / 2
let xPrefix = Array(x[x.startIndex..<i])
let xSuffix = Array(x[i...])
let scoreL = computeLastRow(xPrefix, y)
let scoreR = computeLastRow(Array(xSuffix.reversed()), Array(y.reversed()))

var k = 0
var minCost = Int.max
for j in 0..<scoreL.count {
let cost = scoreL[j] + scoreR[scoreR.count - 1 - j]
if cost < minCost {
minCost = cost
k = j
}
}

let result = hirschbergRec(Array(x[..<i]), Array(y[..<k])) +
hirschbergRec(Array(x[i...]), Array(y[k...]))

return result
}

return hirschbergRec(reference, s2)
}
Loading