diff --git a/Tests/WhisperKitTests/Evaluate/Hirschberg.swift b/Tests/WhisperKitTests/Evaluate/Hirschberg.swift new file mode 100644 index 0000000..ca51bda --- /dev/null +++ b/Tests/WhisperKitTests/Evaluate/Hirschberg.swift @@ -0,0 +1,128 @@ +import Foundation + +//Compute the last row of the edit distance dynamic programming matrix +//between s1 and s2. +func computeLastRow(_ s1Chars: Array, _ s2Chars: Array) -> [Int] { + + var prevRow = Array(0...s2Chars.endIndex) + + for i in 1...s1Chars.endIndex { + var currentRow = [Int](repeating: 0, count: s2Chars.endIndex + 1) + currentRow[0] = i + + for j in 1...s2Chars.endIndex { + let cost = s1Chars[i - 1] == s2Chars[j - 1] ? 0 : 1 + currentRow[j] = min( + prevRow[j] + 1, // Deletion + currentRow[j - 1] + 1, // Insertion + prevRow[j - 1] + cost // Substitution + ) + } + prevRow = currentRow + } + + return prevRow +} + +func needlemanWunsch(_ xArray: Array, _ yArray: Array) -> [EditOp] { + let m = xArray.count + let n = yArray.count + + var dp = [[Int]](repeating: [Int](repeating: 0, count: n + 1), count: m + 1) + for i in 1...m { + dp[i][0] = i + } + for j in 1...n { + dp[0][j] = j + } + + for i in 1...m { + for j in 1...n { + let cost = xArray[i - 1] == yArray[j - 1] ? 0 : 1 + dp[i][j] = min( + dp[i - 1][j] + 1, // Deletion + dp[i][j - 1] + 1, // Insertion + dp[i - 1][j - 1] + cost // Substitution + ) + } + } + + var i = m + var j = n + var ops = [EditOp]() + + while i > 0 && j > 0 { + if dp[i][j] == dp[i - 1][j - 1] && xArray[i - 1] == yArray[j - 1] { + // Match operation is omitted + i -= 1 + j -= 1 + } else if dp[i][j] == dp[i - 1][j - 1] + 1 { + ops.append(EditOp.replace) // Substitution + i -= 1 + j -= 1 + } else if dp[i][j] == dp[i][j - 1] + 1 { + ops.append(EditOp.insert) // Insertion + j -= 1 + } else { + ops.append(EditOp.delete) // Deletion + i -= 1 + } + } + + while i > 0 { + ops.append(EditOp.delete) + i -= 1 + } + while j > 0 { + ops.append(EditOp.insert) + j -= 1 + } + + return ops.reversed() +} + + +func hirschberg(_ reference: Array, _ s2: Array) -> [EditOp] { + + func hirschbergRec(_ x: Array, _ y: Array) -> [EditOp] { + + let m = x.endIndex + let n = y.endIndex + + if m == 0 { + let result = y.map { _ in EditOp.insert } + return result + } + if n == 0 { + let result = x.map { _ in EditOp.delete } + return result + } + if m == 1 || n == 1 { + let result = needlemanWunsch(x, y) + return result + } + + let i = m / 2 + let xPrefix = Array(x[x.startIndex.. [String]{ + + var replacedSentences = [String]() + for sentence in sentences { + // Define the pattern you want to replace + let pattern = "\\s\\s+" + + do { + let regex = try NSRegularExpression(pattern: pattern, options: []) + let replacedString = regex.stringByReplacingMatches( + in: sentence, + options: [], + range: NSRange(location: 0, length: sentence.utf16.count), + withTemplate: " " + ) + replacedSentences.append(replacedString) + } catch { + print("Error while creating regex: \(error)") + } + } + return replacedSentences + } + + //[" this is an example ", " hello goodbye ", " "] + //['this is an example', "hello goodbye", ""] + static func strip(sentences: [String]) -> [String]{ + var replacedSentences = [String]() + for sentence in sentences { + let replacedString = sentence.trimmingCharacters(in: .whitespaces) + replacedSentences.append(replacedString) + } + return replacedSentences + } + + //["hi", "this is an example"] + //[['hi'], ['this', 'is', 'an, 'example']] + static func reduceToListOfListOfWords(sentences: [String], word_delimiter: String = " ") -> [[String]]{ + + func processString(sentence: String) -> [[String]]{ + return [sentence.components(separatedBy: word_delimiter).filter{ !$0.isEmpty }] + } + + func processList(sentences: [String]) -> [[String]]{ + var sentenceCollection = [[String]]() + for sentence in sentences{ + let list_of_words = processString(sentence: sentence)[0] + if !list_of_words.isEmpty { + sentenceCollection.append(list_of_words) + } + } + return sentenceCollection + } + return processList(sentences: sentences) + } +} +class EnglishNumberNormalizer{ + // Convert any spelled-out numbers into arabic numbers, while handling: + // + // - remove any commas + // - keep the suffixes such as: `1960s`, `274th`, `32nd`, etc. + // - spell out currency symbols after the number. e.g. `$20 million` -> `20000000 dollars` + // - spell out `one` and `ones` + // - interpret successive single-digit numbers as nominal: `one oh one` -> `101` + let zeros: Set + + let ones: [String:Int] + let onesPlural: [String:(Int, String)] + let onesOrdinal: [String:(Int, String)] + let onesSuffixed: [String:(Int, String)] + + let tens: [String:Int] + let tensPlural: [String:(Int, String)] + let tensOrdinal: [String:(Int, String)] + let tensSuffixed: [String:(Int, String)] + + let multipliers: [String:Int] + let multipliersPlural: [String : (Int, String)] + let multipliersOrdinal: [String : (Int, String)] + let multipliersSuffixed: [String : (Int, String)] + + let decimals: Set + let precedingPrefixers: [String:String] + let followingPrefixers: [String:String] + + let prefixes: Set + let suffixers: [String:Any] + let specials: Set + let words: Set + let literalWords: Set + + init(){ + let zeros: Set = ["o", "oh", "zero"] + + let ones = Dictionary(uniqueKeysWithValues:[ + "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", + "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", + "eighteen", "nineteen"].enumerated().map { ($0.element, $0.offset + 1)}) + let onesPlural = Dictionary(uniqueKeysWithValues: + ones.map { name, value in + return (name == "six" ? "sixes" : name + "s", (value, "s")) + } + ) + let onesOrdinal = { + var onesDictionary: [String: (Int, String)] = [ + "zeroth": (0, "th"), + "first": (1, "st"), + "second": (2, "nd"), + "third": (3, "rd"), + "fifth": (5, "th"), + "twelfth": (12, "th") + ] + + let updatedOnes = ones.filter { name, value in + value > 3 && value != 5 && value != 12 + }.map { name, value in + return (name + (name.hasSuffix("t") ? "h" : "th"), (value, "th")) + } + + for (key, value) in updatedOnes { + onesDictionary[key] = value + } + + return (onesDictionary) + }() + let onesSuffixed = onesPlural.merging(onesOrdinal) { $1 } + + let tens = [ + "twenty": 20, + "thirty": 30, + "forty": 40, + "fifty": 50, + "sixty": 60, + "seventy": 70, + "eighty": 80, + "ninety": 90, + ] + let tensPlural = Dictionary(uniqueKeysWithValues: tens.map { name, value in + return (name.replacingOccurrences(of: "y", with: "ies"), (value, "s")) + }) + let tensOrdinal = Dictionary(uniqueKeysWithValues: tens.map { name, value in + return (name.replacingOccurrences(of: "y", with: "ieth"), (value, "th")) + }) + let tensSuffixed = tensPlural.merging(tensOrdinal) { $1 } + + //TODO: Figure out a solution for the overflow. + let multipliers: [String: Int] = [ + "hundred": 100, + "thousand": 1_000, + "million": 1_000_000, + "billion": 1_000_000_000, + // "trillion": 1_000_000_000_000, + // "quadrillion": 1_000_000_000_000_000, + // "quintillion": 1_000_000_000_000_000_000 + // "sextillion": 1_000_000_000_000_000_000_000, + // "septillion": 1_000_000_000_000_000_000_000_000, + // "octillion": 1_000_000_000_000_000_000_000_000_000, + // "nonillion": 1_000_000_000_000_000_000_000_000_000_000, + // "decillion": 1_000_000_000_000_000_000_000_000_000_000_000 + ] + let multipliersPlural = Dictionary(uniqueKeysWithValues: multipliers.map { name, value in + return (name + "s", (value, "s")) + }) + let multipliersOrdinal = Dictionary(uniqueKeysWithValues: multipliers.map { name, value in + return (name + "th", (value, "th")) + }) + let multipliersSuffixed = multipliersPlural.merging(multipliersOrdinal) { $1 } + + let decimals: Set = Set(ones.keys).union(tens.keys).union(zeros) + let precedingPrefixers: [String: String] = [ + "minus": "-", + "negative": "-", + "plus": "+", + "positive": "+" + ] + let followingPrefixers: [String: String] = [ + "pound": "£", + "pounds": "£", + "euro": "€", + "euros": "€", + "dollar": "$", + "dollars": "$", + "cent": "¢", + "cents": "¢" + ] + + let prefixes = Set(precedingPrefixers.values) + .union(followingPrefixers.values) + let suffixers: [String: Any] = [ + "per": ["cent": "%"], + "percent": "%" + ] + let specials: Set = ["and", "double", "triple", "point"] + let words = zeros.union(ones.keys) + .union(onesSuffixed.keys) + .union(tens.keys) + .union(tensSuffixed.keys) + .union(multipliers.keys) + .union(multipliersSuffixed.keys) + .union(precedingPrefixers.keys) + .union(followingPrefixers.keys) + .union(suffixers.keys) + .union(specials) + let literalWords: Set = ["one", "ones"] + + self.zeros = zeros + + self.ones = ones + self.onesPlural = onesPlural + self.onesOrdinal = onesOrdinal + self.onesSuffixed = onesSuffixed + + self.tens = tens + self.tensPlural = tensPlural + self.tensOrdinal = tensOrdinal + self.tensSuffixed = tensSuffixed + + self.multipliers = multipliers + self.multipliersPlural = multipliersPlural + self.multipliersOrdinal = multipliersOrdinal + self.multipliersSuffixed = multipliersSuffixed + + self.decimals = decimals + self.precedingPrefixers = precedingPrefixers + self.followingPrefixers = followingPrefixers + + self.prefixes = prefixes + self.suffixers = suffixers + self.specials = specials + self.words = words + self.literalWords = literalWords + } + + func processWords(_ words: [String]) -> [String] { + var prefix: String? = nil + var value: String? = nil + var skip = false + var results: [String] = [] + + func output(_ result: String) -> String { + var result = result + if let prefix = prefix { + result = prefix + result + } + value = nil + prefix = nil + return result + } + + + for idx in 0.. String { + var results = [String]() + + let segments = s.split(separator: "and a half", omittingEmptySubsequences: false) + for (i, segment) in segments.enumerated() { + let trimmedSegment = segment.trimmingCharacters(in: .whitespaces) + if trimmedSegment.isEmpty { + continue + } + + if i == segments.count - 1 { + results.append(String(trimmedSegment)) + } else { + results.append(String(trimmedSegment)) + let lastWord = trimmedSegment.split(separator: " ").last ?? "" + if decimals.contains(String(lastWord)) || multipliers.keys.contains(String(lastWord)) { + results.append("point five") + } else { + results.append("and a half") + } + } + } + + var processedString = results.joined(separator: " ") + + // Put a space at number/letter boundary + processedString = processedString.replacingOccurrences(of: #"([a-z])([0-9])"#, with: "$1 $2", options: .regularExpression) + processedString = processedString.replacingOccurrences(of: #"([0-9])([a-z])"#, with: "$1 $2", options: .regularExpression) + // Remove spaces which could be a suffix + processedString = processedString.replacingOccurrences(of: #"([0-9])\s+(st|nd|rd|th|s)\b"#, with: "$1$2", options: .regularExpression) + + return processedString + } + + func postprocess(_ s: String) -> String { + func combineCents(match: NSTextCheckingResult, in string: String) -> String { + guard let currencyRange = Range(match.range(at: 1), in: string), + let integerRange = Range(match.range(at: 2), in: string), + let centsRange = Range(match.range(at: 3), in: string) else { + return String(string) + } + let currency = String(string[currencyRange]) + let integer = String(string[integerRange]) + let cents = Int(String(string[centsRange])) ?? 0 + return "\(currency)\(integer).\(String(format: "%02d", cents))" + } + + func extractCents(match: NSTextCheckingResult, in string: String) -> String { + guard let centsRange = Range(match.range(at: 1), in: string) else { + return String(string) + } + let cents = Int(String(string[centsRange])) ?? 0 + return "¢\(cents)" + } + + var processedString = s + + // apply currency postprocessing; "$2 and ¢7" -> "$2.07" + do { + let regex1 = try NSRegularExpression(pattern: #"([€£$])([0-9]+) (?:and )?¢([0-9]{1,2})\b"#) + let matches1 = regex1.matches(in: processedString, range: NSRange(processedString.startIndex..., in: processedString)) + for match in matches1.reversed() { + let range = Range(match.range, in: processedString)! + let replacement = combineCents(match: match, in: processedString) + processedString.replaceSubrange(range, with: replacement) + } + } catch { + print("Error in regex: \(error)") + } + + do { + let regex2 = try NSRegularExpression(pattern: #"[€£$]0\\.([0-9]{1,2})\b"#) + let matches2 = regex2.matches(in: processedString, range: NSRange(processedString.startIndex..., in: processedString)) + for match in matches2.reversed() { + let range = Range(match.range, in: processedString)! + let replacement = extractCents(match: match, in: processedString) + processedString.replaceSubrange(range, with: replacement) + } + } catch { + print("Error in regex: \(error)") + } + + // write "one(s)" instead of "1(s)", just for readability + processedString = processedString.replacingOccurrences(of: #"\b1(s?)\b"#, with: "one$1", options: .regularExpression) + + return processedString + } + + func normalize(_ text: String) -> String{ + var s = self.preprocess(text) + let out = self.processWords(s.components(separatedBy: " ").filter({ $0 != ""})) + s = out.joined(separator: " ") + s = self.postprocess(s) + return s + } + +} + +class EnglishSpellingNormalizer{ + // + //Applies British-American spelling mappings as listed in [1]. + //[1] https://www.tysto.com/uk-us-spelling-list.html + + var mapping: [String:String] = [:] + + init(englishSpellingMapping:[String:String]){ + self.mapping = englishSpellingMapping + } + + func normalize(_ text: String) -> String{ + let out = text.components(separatedBy: " ").map( {self.mapping[$0] ?? $0} ) + return out.joined(separator: " ") + } +} + +class EnglishTextNormalizer{ + let numberNormalizer: EnglishNumberNormalizer + let spellingNormalizer: EnglishSpellingNormalizer + let ignorePatterns = #"\b(hmm|mm|mhm|mmm|uh|um)\b"# + let replacers: KeyValuePairs = [ + // common contractions + #"\bwon't\b"#: "will not", + #"\bcan't\b"#: "can not", + #"\blet's\b"#: "let us", + #"\bain't\b"#: "aint", + #"\by'all\b"#: "you all", + #"\bwanna\b"#: "want to", + #"\bgotta\b"#: "got to", + #"\bgonna\b"#: "going to", + #"\bi'ma\b"#: "i am going to", + #"\bimma\b"#: "i am going to", + #"\bwoulda\b"#: "would have", + #"\bcoulda\b"#: "could have", + #"\bshoulda\b"#: "should have", + #"\bma'am\b"#: "madam", + // contractions in titles/prefixes + #"\bmr\b"#: "mister ", + #"\bmrs\b"#: "missus ", + #"\bst\b"#: "saint ", + #"\bdr\b"#: "doctor ", + #"\bprof\b"#: "professor ", + #"\bcapt\b"#: "captain ", + #"\bgov\b"#: "governor ", + #"\bald\b"#: "alderman ", + #"\bgen\b"#: "general ", + #"\bsen\b"#: "senator ", + #"\brep\b"#: "representative ", + #"\bpres\b"#: "president ", + #"\brev\b"#: "reverend ", + #"\bhon\b"#: "honorable ", + #"\basst\b"#: "assistant ", + #"\bassoc\b"#: "associate ", + #"\blt\b"#: "lieutenant ", + #"\bcol\b"#: "colonel ", + #"\bjr\b"#: "junior ", + #"\bsr\b"#: "senior ", + #"\besq\b"#: "esquire ", + // prefect tenses, ideally it should be any past participles, but it's harder.. + #"'d been\b"#: " had been", + #"'s been\b"#: " has been", + #"'d gone\b"#: " had gone", + #"'s gone\b"#: " has gone", + #"'d done\b"#: " had done", // "'s done" is ambiguous + #"'s got\b"#: " has got", + // general contractions + #"n't\b"#: " not", + #"'re\b"#: " are", + #"'s\b"#: " is", + #"'d\b"#: " would", + #"'ll\b"#: " will", + #"'t\b"#: " not", + #"'ve\b"#: " have", + #"'m\b"#: " am", + ] + // non-ASCII letters that are not separated by "NFKD" normalization + let ADDITIONAL_DIACRITICS = [ + "œ": "oe", + "Œ": "OE", + "ø": "o", + "Ø": "O", + "æ": "ae", + "Æ": "AE", + "ß": "ss", + "ẞ": "SS", + "đ": "d", + "Đ": "D", + "ð": "d", + "Ð": "D", + "þ": "th", + "Þ": "th", + "ł": "l", + "Ł": "L", + ] + + init(){ + self.numberNormalizer = EnglishNumberNormalizer() + self.spellingNormalizer = EnglishSpellingNormalizer(englishSpellingMapping: englishSpellingMappingAbbr) + } + + func normalize(text: String) -> String{ + var processedText = text + processedText = processedText.lowercased() + + // remove words between brackets + processedText.regReplace(pattern: #"[<\[][^>\]]*[>\]]"#, replaceWith: "") + // remove words between parenthesis + processedText.regReplace(pattern: #"\(([^)]+?)\)"#, replaceWith: "") + processedText.regReplace(pattern: self.ignorePatterns, replaceWith: "") + // standardize when there's a space before an apostrophe + processedText.regReplace(pattern: #"\s+'"#, replaceWith: "'") + + for (pattern, replacement) in self.replacers{ + processedText.regReplace(pattern: pattern, replaceWith: replacement) + } + + // remove commas between digits + processedText.regReplace(pattern: #"(\d),(\d)"#, replaceWith: #"$1$2"#) + // remove periods not followed by numbers + processedText.regReplace(pattern: #"\.([^0-9]|$)"#, replaceWith: " $1") + // keep some symbols for numerics + processedText = self.removeSymbolsAndDiacritics(text: processedText, keep: ".%$¢€£") + processedText = self.numberNormalizer.normalize(processedText) + processedText = self.spellingNormalizer.normalize(processedText) + + // now remove prefix/suffix symbols that are not preceded/followed by numbers + processedText.regReplace(pattern: #"[.$¢€£]([^0-9])"#, replaceWith: #" $1"#) + processedText.regReplace(pattern: #"([^0-9])%"#, replaceWith: #"$1 "#) + // replace any successive whitespace characters with a space + processedText.regReplace(pattern: #"\s+"#, replaceWith: " ") + + return processedText + } + + func removeSymbolsAndDiacritics(text: String, keep:String="") -> String{ + //Replace any other markers, symbols, and punctuations with a space, and drop any diacritics + //(category 'Mn' and some manual mappings) + let keepSet = Set(keep) + let categoriesToReplaceWithSpace: [Unicode.GeneralCategory] = [ + .nonspacingMark, + .spacingMark, + .enclosingMark, + .mathSymbol, + .otherSymbol, + .currencySymbol, + .modifierSymbol, + .dashPunctuation, + .openPunctuation, + .closePunctuation, + .finalPunctuation, + .otherPunctuation, + .initialPunctuation, + .connectorPunctuation + ] + func replaceCharacter(char: Character) -> String{ + + if keepSet.contains(char){ + return String(char) + } + else if self.ADDITIONAL_DIACRITICS.keys.contains(String(char)){ + return self.ADDITIONAL_DIACRITICS[String(char)]! + } + else if unicodeCategoryFor(char: char) == Unicode.GeneralCategory.nonspacingMark{ + return "" + } + else if let category = unicodeCategoryFor(char: char), categoriesToReplaceWithSpace.contains(category){ + return " " + } + return String(char) + } + + func unicodeCategoryFor(char: Character) -> Unicode.GeneralCategory?{ + guard let scalar = char.unicodeScalars.first else {return nil} + return scalar.properties.generalCategory + } + + if let normalizedString = text.applyingTransform(StringTransform(rawValue: "NFKD"), reverse: false) { + let out = normalizedString.map({ replaceCharacter(char: $0)}) + return out.joined(separator: "") + } + return text + } +} + +private extension String { + mutating func regReplace(pattern: String, replaceWith: String = "") { + do { + let regex = try NSRegularExpression(pattern: pattern, options: [.caseInsensitive, .anchorsMatchLines]) + let range = NSRange(self.startIndex..., in: self) + self = regex.stringByReplacingMatches(in: self, options: [], range: range, withTemplate: replaceWith) + } catch { return } + } +} + +private extension Double{ + func isDenominatorCloseToOne(tolerance: Double = 1e-9) -> Bool { + let fractionalPart = self - floor(self) + return fractionalPart < tolerance || fractionalPart > (1 - tolerance) + } +} + +private extension Decimal { + var isInteger: Bool { + return self == self.floored() + } + + func floored() -> Decimal { + let nsDecimalNumber = NSDecimalNumber(decimal: self) + let flooredNumber = nsDecimalNumber.rounding( + accordingToBehavior: NSDecimalNumberHandler( + roundingMode: .down, + scale: 0, + raiseOnExactness: false, + raiseOnOverflow: false, + raiseOnUnderflow: false, + raiseOnDivideByZero: false + ) + ) + return flooredNumber.decimalValue + } + + func toString() -> String { + return "\(self)" + } + + func integerPart() -> String{ + return String(self.toString().split(separator: ".").first ?? "0") + } + + func remainder(dividingBy divisor: Decimal) -> Decimal { + let decimalNumber = NSDecimalNumber(decimal: self) + let divisorNumber = NSDecimalNumber(decimal: divisor) + + let quotient = decimalNumber.dividing(by: divisorNumber, withBehavior: nil) + let roundedQuotient = quotient.rounding(accordingToBehavior: NSDecimalNumberHandler(roundingMode: .down, scale: 0, raiseOnExactness: false, raiseOnOverflow: false, raiseOnUnderflow: false, raiseOnDivideByZero: false)) + + let product = roundedQuotient.multiplying(by: divisorNumber) + let remainder = decimalNumber.subtracting(product) + + return remainder.decimalValue + } +} diff --git a/Tests/WhisperKitTests/Evaluate/SpellingMapping.swift b/Tests/WhisperKitTests/Evaluate/SpellingMapping.swift new file mode 100644 index 0000000..765eb5f --- /dev/null +++ b/Tests/WhisperKitTests/Evaluate/SpellingMapping.swift @@ -0,0 +1,1743 @@ +// https://github.com/argmaxinc/whisperkittools/blob/main/whisperkit/evaluate/abbreviations_en.py See abbr +let englishSpellingMappingAbbr = [ + "accessorise": "accessorize", + "accessorised": "accessorized", + "accessorises": "accessorizes", + "accessorising": "accessorizing", + "acclimatisation": "acclimatization", + "acclimatise": "acclimatize", + "acclimatised": "acclimatized", + "acclimatises": "acclimatizes", + "acclimatising": "acclimatizing", + "accoutrements": "accouterments", + "aeon": "eon", + "aeons": "eons", + "aerogramme": "aerogram", + "aerogrammes": "aerograms", + "aeroplane": "airplane", + "aeroplanes": "airplanes", + "aesthete": "esthete", + "aesthetes": "esthetes", + "aesthetic": "esthetic", + "aesthetically": "esthetically", + "aesthetics": "esthetics", + "aetiology": "etiology", + "ageing": "aging", + "aggrandisement": "aggrandizement", + "agonise": "agonize", + "agonised": "agonized", + "agonises": "agonizes", + "agonising": "agonizing", + "agonisingly": "agonizingly", + "almanack": "almanac", + "almanacks": "almanacs", + "aluminium": "aluminum", + "amortisable": "amortizable", + "amortisation": "amortization", + "amortisations": "amortizations", + "amortise": "amortize", + "amortised": "amortized", + "amortises": "amortizes", + "amortising": "amortizing", + "amphitheatre": "amphitheater", + "amphitheatres": "amphitheaters", + "anaemia": "anemia", + "anaemic": "anemic", + "anaesthesia": "anesthesia", + "anaesthetic": "anesthetic", + "anaesthetics": "anesthetics", + "anaesthetise": "anesthetize", + "anaesthetised": "anesthetized", + "anaesthetises": "anesthetizes", + "anaesthetising": "anesthetizing", + "anaesthetist": "anesthetist", + "anaesthetists": "anesthetists", + "anaesthetize": "anesthetize", + "anaesthetized": "anesthetized", + "anaesthetizes": "anesthetizes", + "anaesthetizing": "anesthetizing", + "analogue": "analog", + "analogues": "analogs", + "analyse": "analyze", + "analysed": "analyzed", + "analyses": "analyzes", + "analysing": "analyzing", + "anglicise": "anglicize", + "anglicised": "anglicized", + "anglicises": "anglicizes", + "anglicising": "anglicizing", + "annualised": "annualized", + "antagonise": "antagonize", + "antagonised": "antagonized", + "antagonises": "antagonizes", + "antagonising": "antagonizing", + "apologise": "apologize", + "apologised": "apologized", + "apologises": "apologizes", + "apologising": "apologizing", + "appal": "appall", + "appals": "appalls", + "appetiser": "appetizer", + "appetisers": "appetizers", + "appetising": "appetizing", + "appetisingly": "appetizingly", + "arbour": "arbor", + "arbours": "arbors", + "archaeologically": "archeologically", + "archaeologist": "archeologist", + "archaeologists": "archeologists", + "archaeology": "archeology", + "archeological": "archaeological", + "ardour": "ardor", + "armour": "armor", + "armoured": "armored", + "armourer": "armorer", + "armourers": "armorers", + "armouries": "armories", + "armoury": "armory", + "artefact": "artifact", + "artefacts": "artifacts", + "authorise": "authorize", + "authorised": "authorized", + "authorises": "authorizes", + "authorising": "authorizing", + "axe": "ax", + "backpedalled": "backpedaled", + "backpedalling": "backpedaling", + "bannister": "banister", + "bannisters": "banisters", + "baptise": "baptize", + "baptised": "baptized", + "baptises": "baptizes", + "baptising": "baptizing", + "bastardise": "bastardize", + "bastardised": "bastardized", + "bastardises": "bastardizes", + "bastardising": "bastardizing", + "battleax": "battleaxe", + "baulk": "balk", + "baulked": "balked", + "baulking": "balking", + "baulks": "balks", + "bedevilled": "bedeviled", + "bedevilling": "bedeviling", + "behaviour": "behavior", + "behavioural": "behavioral", + "behaviourism": "behaviorism", + "behaviourist": "behaviorist", + "behaviourists": "behaviorists", + "behaviours": "behaviors", + "behove": "behoove", + "behoved": "behooved", + "behoves": "behooves", + "bejewelled": "bejeweled", + "belabour": "belabor", + "belaboured": "belabored", + "belabouring": "belaboring", + "belabours": "belabors", + "bevelled": "beveled", + "bevvies": "bevies", + "bevvy": "bevy", + "biassed": "biased", + "biassing": "biasing", + "bingeing": "binging", + "bougainvillaea": "bougainvillea", + "bougainvillaeas": "bougainvilleas", + "bowdlerise": "bowdlerize", + "bowdlerised": "bowdlerized", + "bowdlerises": "bowdlerizes", + "bowdlerising": "bowdlerizing", + "breathalyse": "breathalyze", + "breathalysed": "breathalyzed", + "breathalyser": "breathalyzer", + "breathalysers": "breathalyzers", + "breathalyses": "breathalyzes", + "breathalysing": "breathalyzing", + "brutalise": "brutalize", + "brutalised": "brutalized", + "brutalises": "brutalizes", + "brutalising": "brutalizing", + "busses": "buses", + "bussing": "busing", + "caesarean": "cesarean", + "caesareans": "cesareans", + "calibre": "caliber", + "calibres": "calibers", + "calliper": "caliper", + "callipers": "calipers", + "callisthenics": "calisthenics", + "canalise": "canalize", + "canalised": "canalized", + "canalises": "canalizes", + "canalising": "canalizing", + "cancelation": "cancellation", + "cancelations": "cancellations", + "cancelled": "canceled", + "cancelling": "canceling", + "candour": "candor", + "cannibalise": "cannibalize", + "cannibalised": "cannibalized", + "cannibalises": "cannibalizes", + "cannibalising": "cannibalizing", + "canonise": "canonize", + "canonised": "canonized", + "canonises": "canonizes", + "canonising": "canonizing", + "capitalise": "capitalize", + "capitalised": "capitalized", + "capitalises": "capitalizes", + "capitalising": "capitalizing", + "caramelise": "caramelize", + "caramelised": "caramelized", + "caramelises": "caramelizes", + "caramelising": "caramelizing", + "carbonise": "carbonize", + "carbonised": "carbonized", + "carbonises": "carbonizes", + "carbonising": "carbonizing", + "carolled": "caroled", + "carolling": "caroling", + "catalogue": "catalog", + "catalogued": "cataloged", + "catalogues": "catalogs", + "cataloguing": "cataloging", + "catalyse": "catalyze", + "catalysed": "catalyzed", + "catalyses": "catalyzes", + "catalysing": "catalyzing", + "categorise": "categorize", + "categorised": "categorized", + "categorises": "categorizes", + "categorising": "categorizing", + "cauterise": "cauterize", + "cauterised": "cauterized", + "cauterises": "cauterizes", + "cauterising": "cauterizing", + "cavilled": "caviled", + "cavilling": "caviling", + "centigramme": "centigram", + "centigrammes": "centigrams", + "centilitre": "centiliter", + "centilitres": "centiliters", + "centimetre": "centimeter", + "centimetres": "centimeters", + "centralise": "centralize", + "centralised": "centralized", + "centralises": "centralizes", + "centralising": "centralizing", + "centre": "center", + "centred": "centered", + "centrefold": "centerfold", + "centrefolds": "centerfolds", + "centrepiece": "centerpiece", + "centrepieces": "centerpieces", + "centres": "centers", + "channelled": "channeled", + "channelling": "channeling", + "characterise": "characterize", + "characterised": "characterized", + "characterises": "characterizes", + "characterising": "characterizing", + "cheque": "check", + "chequebook": "checkbook", + "chequebooks": "checkbooks", + "chequered": "checkered", + "cheques": "checks", + "chilli": "chili", + "chimaera": "chimera", + "chimaeras": "chimeras", + "chiselled": "chiseled", + "chiselling": "chiseling", + "circularise": "circularize", + "circularised": "circularized", + "circularises": "circularizes", + "circularising": "circularizing", + "civilise": "civilize", + "civilised": "civilized", + "civilises": "civilizes", + "civilising": "civilizing", + "clamour": "clamor", + "clamoured": "clamored", + "clamouring": "clamoring", + "clamours": "clamors", + "clangour": "clangor", + "clarinettist": "clarinetist", + "clarinettists": "clarinetists", + "collectivise": "collectivize", + "collectivised": "collectivized", + "collectivises": "collectivizes", + "collectivising": "collectivizing", + "colonisation": "colonization", + "colonise": "colonize", + "colonised": "colonized", + "coloniser": "colonizer", + "colonisers": "colonizers", + "colonises": "colonizes", + "colonising": "colonizing", + "colour": "color", + "colourant": "colorant", + "colourants": "colorants", + "coloured": "colored", + "coloureds": "coloreds", + "colourful": "colorful", + "colourfully": "colorfully", + "colouring": "coloring", + "colourize": "colorize", + "colourized": "colorized", + "colourizes": "colorizes", + "colourizing": "colorizing", + "colourless": "colorless", + "colours": "colors", + "commercialise": "commercialize", + "commercialised": "commercialized", + "commercialises": "commercializes", + "commercialising": "commercializing", + "compartmentalise": "compartmentalize", + "compartmentalised": "compartmentalized", + "compartmentalises": "compartmentalizes", + "compartmentalising": "compartmentalizing", + "computerise": "computerize", + "computerised": "computerized", + "computerises": "computerizes", + "computerising": "computerizing", + "conceptualise": "conceptualize", + "conceptualised": "conceptualized", + "conceptualises": "conceptualizes", + "conceptualising": "conceptualizing", + "connexion": "connection", + "connexions": "connections", + "contextualise": "contextualize", + "contextualised": "contextualized", + "contextualises": "contextualizes", + "contextualising": "contextualizing", + "cosier": "cozier", + "cosies": "cozies", + "cosiest": "coziest", + "cosily": "cozily", + "cosiness": "coziness", + "cosy": "cozy", + "councillor": "councilor", + "councillors": "councilors", + "counselled": "counseled", + "counselling": "counseling", + "counsellor": "counselor", + "counsellors": "counselors", + "crenelated": "crenellated", + "criminalise": "criminalize", + "criminalised": "criminalized", + "criminalises": "criminalizes", + "criminalising": "criminalizing", + "criticise": "criticize", + "criticised": "criticized", + "criticises": "criticizes", + "criticising": "criticizing", + "crueller": "crueler", + "cruellest": "cruelest", + "crystallisation": "crystallization", + "crystallise": "crystallize", + "crystallised": "crystallized", + "crystallises": "crystallizes", + "crystallising": "crystallizing", + "cudgelled": "cudgeled", + "cudgelling": "cudgeling", + "customise": "customize", + "customised": "customized", + "customises": "customizes", + "customising": "customizing", + "cypher": "cipher", + "cyphers": "ciphers", + "decentralisation": "decentralization", + "decentralise": "decentralize", + "decentralised": "decentralized", + "decentralises": "decentralizes", + "decentralising": "decentralizing", + "decriminalisation": "decriminalization", + "decriminalise": "decriminalize", + "decriminalised": "decriminalized", + "decriminalises": "decriminalizes", + "decriminalising": "decriminalizing", + "defence": "defense", + "defenceless": "defenseless", + "defences": "defenses", + "dehumanisation": "dehumanization", + "dehumanise": "dehumanize", + "dehumanised": "dehumanized", + "dehumanises": "dehumanizes", + "dehumanising": "dehumanizing", + "demeanour": "demeanor", + "demilitarisation": "demilitarization", + "demilitarise": "demilitarize", + "demilitarised": "demilitarized", + "demilitarises": "demilitarizes", + "demilitarising": "demilitarizing", + "demobilisation": "demobilization", + "demobilise": "demobilize", + "demobilised": "demobilized", + "demobilises": "demobilizes", + "demobilising": "demobilizing", + "democratisation": "democratization", + "democratise": "democratize", + "democratised": "democratized", + "democratises": "democratizes", + "democratising": "democratizing", + "demonise": "demonize", + "demonised": "demonized", + "demonises": "demonizes", + "demonising": "demonizing", + "demoralisation": "demoralization", + "demoralise": "demoralize", + "demoralised": "demoralized", + "demoralises": "demoralizes", + "demoralising": "demoralizing", + "denationalisation": "denationalization", + "denationalise": "denationalize", + "denationalised": "denationalized", + "denationalises": "denationalizes", + "denationalising": "denationalizing", + "deodorise": "deodorize", + "deodorised": "deodorized", + "deodorises": "deodorizes", + "deodorising": "deodorizing", + "depersonalise": "depersonalize", + "depersonalised": "depersonalized", + "depersonalises": "depersonalizes", + "depersonalising": "depersonalizing", + "deputise": "deputize", + "deputised": "deputized", + "deputises": "deputizes", + "deputising": "deputizing", + "desensitisation": "desensitization", + "desensitise": "desensitize", + "desensitised": "desensitized", + "desensitises": "desensitizes", + "desensitising": "desensitizing", + "destabilisation": "destabilization", + "destabilise": "destabilize", + "destabilised": "destabilized", + "destabilises": "destabilizes", + "destabilising": "destabilizing", + "dialled": "dialed", + "dialling": "dialing", + "dialogue": "dialog", + "dialogues": "dialogs", + "diarrhoea": "diarrhea", + "digitise": "digitize", + "digitised": "digitized", + "digitises": "digitizes", + "digitising": "digitizing", + "disc": "disk", + "discolour": "discolor", + "discoloured": "discolored", + "discolouring": "discoloring", + "discolours": "discolors", + "discs": "disks", + "disembowelled": "disemboweled", + "disembowelling": "disemboweling", + "disfavour": "disfavor", + "dishevelled": "disheveled", + "dishonour": "dishonor", + "dishonourable": "dishonorable", + "dishonourably": "dishonorably", + "dishonoured": "dishonored", + "dishonouring": "dishonoring", + "dishonours": "dishonors", + "disorganisation": "disorganization", + "disorganised": "disorganized", + "distil": "distill", + "distils": "distills", + "dramatisation": "dramatization", + "dramatisations": "dramatizations", + "dramatise": "dramatize", + "dramatised": "dramatized", + "dramatises": "dramatizes", + "dramatising": "dramatizing", + "draught": "draft", + "draughtboard": "draftboard", + "draughtboards": "draftboards", + "draughtier": "draftier", + "draughtiest": "draftiest", + "draughts": "drafts", + "draughtsman": "draftsman", + "draughtsmanship": "draftsmanship", + "draughtsmen": "draftsmen", + "draughtswoman": "draftswoman", + "draughtswomen": "draftswomen", + "draughty": "drafty", + "drivelled": "driveled", + "drivelling": "driveling", + "duelled": "dueled", + "duelling": "dueling", + "economise": "economize", + "economised": "economized", + "economises": "economizes", + "economising": "economizing", + "editorialise": "editorialize", + "editorialised": "editorialized", + "editorialises": "editorializes", + "editorialising": "editorializing", + "edoema": "edema", + "empathise": "empathize", + "empathised": "empathized", + "empathises": "empathizes", + "empathising": "empathizing", + "emphasise": "emphasize", + "emphasised": "emphasized", + "emphasises": "emphasizes", + "emphasising": "emphasizing", + "enamelled": "enameled", + "enamelling": "enameling", + "enamoured": "enamored", + "encyclopaedia": "encyclopedia", + "encyclopaedias": "encyclopedias", + "encyclopaedic": "encyclopedic", + "endeavour": "endeavor", + "endeavoured": "endeavored", + "endeavouring": "endeavoring", + "endeavours": "endeavors", + "energise": "energize", + "energised": "energized", + "energises": "energizes", + "energising": "energizing", + "enrol": "enroll", + "enrols": "enrolls", + "enthral": "enthrall", + "enthrals": "enthralls", + "epaulette": "epaulet", + "epaulettes": "epaulets", + "epicentre": "epicenter", + "epicentres": "epicenters", + "epilogue": "epilog", + "epilogues": "epilogs", + "epitomise": "epitomize", + "epitomised": "epitomized", + "epitomises": "epitomizes", + "epitomising": "epitomizing", + "equalisation": "equalization", + "equalise": "equalize", + "equalised": "equalized", + "equaliser": "equalizer", + "equalisers": "equalizers", + "equalises": "equalizes", + "equalising": "equalizing", + "eulogise": "eulogize", + "eulogised": "eulogized", + "eulogises": "eulogizes", + "eulogising": "eulogizing", + "evangelise": "evangelize", + "evangelised": "evangelized", + "evangelises": "evangelizes", + "evangelising": "evangelizing", + "exorcise": "exorcize", + "exorcised": "exorcized", + "exorcises": "exorcizes", + "exorcising": "exorcizing", + "extemporisation": "extemporization", + "extemporise": "extemporize", + "extemporised": "extemporized", + "extemporises": "extemporizes", + "extemporising": "extemporizing", + "externalisation": "externalization", + "externalisations": "externalizations", + "externalise": "externalize", + "externalised": "externalized", + "externalises": "externalizes", + "externalising": "externalizing", + "factorise": "factorize", + "factorised": "factorized", + "factorises": "factorizes", + "factorising": "factorizing", + "faecal": "fecal", + "faeces": "feces", + "familiarisation": "familiarization", + "familiarise": "familiarize", + "familiarised": "familiarized", + "familiarises": "familiarizes", + "familiarising": "familiarizing", + "fantasise": "fantasize", + "fantasised": "fantasized", + "fantasises": "fantasizes", + "fantasising": "fantasizing", + "favour": "favor", + "favourable": "favorable", + "favourably": "favorably", + "favoured": "favored", + "favouring": "favoring", + "favourite": "favorite", + "favourites": "favorites", + "favouritism": "favoritism", + "favours": "favors", + "feminise": "feminize", + "feminised": "feminized", + "feminises": "feminizes", + "feminising": "feminizing", + "fertilisation": "fertilization", + "fertilise": "fertilize", + "fertilised": "fertilized", + "fertiliser": "fertilizer", + "fertilisers": "fertilizers", + "fertilises": "fertilizes", + "fertilising": "fertilizing", + "fervour": "fervor", + "fibre": "fiber", + "fibreglass": "fiberglass", + "fibres": "fibers", + "fictionalisation": "fictionalization", + "fictionalisations": "fictionalizations", + "fictionalise": "fictionalize", + "fictionalised": "fictionalized", + "fictionalises": "fictionalizes", + "fictionalising": "fictionalizing", + "fillet": "filet", + "filleted": "fileted", + "filleting": "fileting", + "fillets": "filets", + "finalisation": "finalization", + "finalise": "finalize", + "finalised": "finalized", + "finalises": "finalizes", + "finalising": "finalizing", + "flautist": "flutist", + "flautists": "flutists", + "flavour": "flavor", + "flavoured": "flavored", + "flavouring": "flavoring", + "flavourings": "flavorings", + "flavourless": "flavorless", + "flavours": "flavors", + "flavoursome": "flavorsome", + "flyer / flier": "flier / flyer", + "foetal": "fetal", + "foetid": "fetid", + "foetus": "fetus", + "foetuses": "fetuses", + "formalisation": "formalization", + "formalise": "formalize", + "formalised": "formalized", + "formalises": "formalizes", + "formalising": "formalizing", + "fossilisation": "fossilization", + "fossilise": "fossilize", + "fossilised": "fossilized", + "fossilises": "fossilizes", + "fossilising": "fossilizing", + "fraternisation": "fraternization", + "fraternise": "fraternize", + "fraternised": "fraternized", + "fraternises": "fraternizes", + "fraternising": "fraternizing", + "fulfil": "fulfill", + "fulfilment": "fulfillment", + "fulfils": "fulfills", + "funnelled": "funneled", + "funnelling": "funneling", + "gage": "gauge", + "gaged": "gauged", + "gages": "gauges", + "gaging": "gauging", + "galvanise": "galvanize", + "galvanised": "galvanized", + "galvanises": "galvanizes", + "galvanising": "galvanizing", + "gambolled": "gamboled", + "gambolling": "gamboling", + "gaol": "jail", + "gaolbird": "jailbird", + "gaolbirds": "jailbirds", + "gaolbreak": "jailbreak", + "gaolbreaks": "jailbreaks", + "gaoled": "jailed", + "gaoler": "jailer", + "gaolers": "jailers", + "gaoling": "jailing", + "gaols": "jails", + "gasses": "gases", + "generalisation": "generalization", + "generalisations": "generalizations", + "generalise": "generalize", + "generalised": "generalized", + "generalises": "generalizes", + "generalising": "generalizing", + "ghettoise": "ghettoize", + "ghettoised": "ghettoized", + "ghettoises": "ghettoizes", + "ghettoising": "ghettoizing", + "gipsies": "gypsies", + "glamor": "glamour", + "glamorise": "glamorize", + "glamorised": "glamorized", + "glamorises": "glamorizes", + "glamorising": "glamorizing", + "globalisation": "globalization", + "globalise": "globalize", + "globalised": "globalized", + "globalises": "globalizes", + "globalising": "globalizing", + "glueing": "gluing", + "goitre": "goiter", + "goitres": "goiters", + "gonorrhoea": "gonorrhea", + "gramme": "gram", + "grammes": "grams", + "gravelled": "graveled", + "grey": "gray", + "greyed": "grayed", + "greying": "graying", + "greyish": "grayish", + "greyness": "grayness", + "greys": "grays", + "grovelled": "groveled", + "grovelling": "groveling", + "groyne": "groin", + "groynes": "groins", + "gruelling": "grueling", + "gruellingly": "gruelingly", + "gryphon": "griffin", + "gryphons": "griffins", + "gynaecological": "gynecological", + "gynaecologist": "gynecologist", + "gynaecologists": "gynecologists", + "gynaecology": "gynecology", + "haematological": "hematological", + "haematologist": "hematologist", + "haematologists": "hematologists", + "haematology": "hematology", + "haemoglobin": "hemoglobin", + "haemophilia": "hemophilia", + "haemophiliac": "hemophiliac", + "haemophiliacs": "hemophiliacs", + "haemorrhage": "hemorrhage", + "haemorrhaged": "hemorrhaged", + "haemorrhages": "hemorrhages", + "haemorrhaging": "hemorrhaging", + "haemorrhoids": "hemorrhoids", + "harbour": "harbor", + "harboured": "harbored", + "harbouring": "harboring", + "harbours": "harbors", + "harmonisation": "harmonization", + "harmonise": "harmonize", + "harmonised": "harmonized", + "harmonises": "harmonizes", + "harmonising": "harmonizing", + "homoeopath": "homeopath", + "homoeopathic": "homeopathic", + "homoeopaths": "homeopaths", + "homoeopathy": "homeopathy", + "homogenise": "homogenize", + "homogenised": "homogenized", + "homogenises": "homogenizes", + "homogenising": "homogenizing", + "honour": "honor", + "honourable": "honorable", + "honourably": "honorably", + "honoured": "honored", + "honouring": "honoring", + "honours": "honors", + "hospitalisation": "hospitalization", + "hospitalise": "hospitalize", + "hospitalised": "hospitalized", + "hospitalises": "hospitalizes", + "hospitalising": "hospitalizing", + "humanise": "humanize", + "humanised": "humanized", + "humanises": "humanizes", + "humanising": "humanizing", + "humour": "humor", + "humoured": "humored", + "humouring": "humoring", + "humourless": "humorless", + "humours": "humors", + "hybridise": "hybridize", + "hybridised": "hybridized", + "hybridises": "hybridizes", + "hybridising": "hybridizing", + "hypnotise": "hypnotize", + "hypnotised": "hypnotized", + "hypnotises": "hypnotizes", + "hypnotising": "hypnotizing", + "hypothesise": "hypothesize", + "hypothesised": "hypothesized", + "hypothesises": "hypothesizes", + "hypothesising": "hypothesizing", + "idealisation": "idealization", + "idealise": "idealize", + "idealised": "idealized", + "idealises": "idealizes", + "idealising": "idealizing", + "idolise": "idolize", + "idolised": "idolized", + "idolises": "idolizes", + "idolising": "idolizing", + "immobilisation": "immobilization", + "immobilise": "immobilize", + "immobilised": "immobilized", + "immobiliser": "immobilizer", + "immobilisers": "immobilizers", + "immobilises": "immobilizes", + "immobilising": "immobilizing", + "immortalise": "immortalize", + "immortalised": "immortalized", + "immortalises": "immortalizes", + "immortalising": "immortalizing", + "immunisation": "immunization", + "immunise": "immunize", + "immunised": "immunized", + "immunises": "immunizes", + "immunising": "immunizing", + "impanelled": "impaneled", + "impanelling": "impaneling", + "imperilled": "imperiled", + "imperilling": "imperiling", + "individualise": "individualize", + "individualised": "individualized", + "individualises": "individualizes", + "individualising": "individualizing", + "industrialise": "industrialize", + "industrialised": "industrialized", + "industrialises": "industrializes", + "industrialising": "industrializing", + "inflexion": "inflection", + "inflexions": "inflections", + "initialise": "initialize", + "initialised": "initialized", + "initialises": "initializes", + "initialising": "initializing", + "initialled": "initialed", + "initialling": "initialing", + "instal": "install", + "instalment": "installment", + "instalments": "installments", + "instals": "installs", + "instil": "instill", + "instils": "instills", + "institutionalisation": "institutionalization", + "institutionalise": "institutionalize", + "institutionalised": "institutionalized", + "institutionalises": "institutionalizes", + "institutionalising": "institutionalizing", + "intellectualise": "intellectualize", + "intellectualised": "intellectualized", + "intellectualises": "intellectualizes", + "intellectualising": "intellectualizing", + "internalisation": "internalization", + "internalise": "internalize", + "internalised": "internalized", + "internalises": "internalizes", + "internalising": "internalizing", + "internationalisation": "internationalization", + "internationalise": "internationalize", + "internationalised": "internationalized", + "internationalises": "internationalizes", + "internationalising": "internationalizing", + "ionisation": "ionization", + "ionise": "ionize", + "ionised": "ionized", + "ioniser": "ionizer", + "ionisers": "ionizers", + "ionises": "ionizes", + "ionising": "ionizing", + "italicise": "italicize", + "italicised": "italicized", + "italicises": "italicizes", + "italicising": "italicizing", + "itemise": "itemize", + "itemised": "itemized", + "itemises": "itemizes", + "itemising": "itemizing", + "jeopardise": "jeopardize", + "jeopardised": "jeopardized", + "jeopardises": "jeopardizes", + "jeopardising": "jeopardizing", + "jewelled": "jeweled", + "jeweller": "jeweler", + "jewellers": "jewelers", + "jewellery": "jewelry", + "judgement": "judgment", + "kilogramme": "kilogram", + "kilogrammes": "kilograms", + "kilometre": "kilometer", + "kilometres": "kilometers", + "labelled": "labeled", + "labelling": "labeling", + "labour": "labor", + "laboured": "labored", + "labourer": "laborer", + "labourers": "laborers", + "labouring": "laboring", + "labours": "labors", + "lacklustre": "lackluster", + "legalisation": "legalization", + "legalise": "legalize", + "legalised": "legalized", + "legalises": "legalizes", + "legalising": "legalizing", + "legitimise": "legitimize", + "legitimised": "legitimized", + "legitimises": "legitimizes", + "legitimising": "legitimizing", + "leukaemia": "leukemia", + "levelled": "leveled", + "leveller": "leveler", + "levellers": "levelers", + "levelling": "leveling", + "libelled": "libeled", + "libelling": "libeling", + "libellous": "libelous", + "liberalisation": "liberalization", + "liberalise": "liberalize", + "liberalised": "liberalized", + "liberalises": "liberalizes", + "liberalising": "liberalizing", + "licence": "license", + "licenced": "licensed", + "licences": "licenses", + "licencing": "licensing", + "likeable": "likable", + "lionisation": "lionization", + "lionise": "lionize", + "lionised": "lionized", + "lionises": "lionizes", + "lionising": "lionizing", + "liquidise": "liquidize", + "liquidised": "liquidized", + "liquidiser": "liquidizer", + "liquidisers": "liquidizers", + "liquidises": "liquidizes", + "liquidising": "liquidizing", + "litre": "liter", + "litres": "liters", + "localise": "localize", + "localised": "localized", + "localises": "localizes", + "localising": "localizing", + "louvre": "louver", + "louvred": "louvered", + "louvres": "louvers", + "lustre": "luster", + "magnetise": "magnetize", + "magnetised": "magnetized", + "magnetises": "magnetizes", + "magnetising": "magnetizing", + "manoeuvrability": "maneuverability", + "manoeuvrable": "maneuverable", + "manoeuvre": "maneuver", + "manoeuvred": "maneuvered", + "manoeuvres": "maneuvers", + "manoeuvring": "maneuvering", + "manoeuvrings": "maneuverings", + "marginalisation": "marginalization", + "marginalise": "marginalize", + "marginalised": "marginalized", + "marginalises": "marginalizes", + "marginalising": "marginalizing", + "marshalled": "marshaled", + "marshalling": "marshaling", + "marvelled": "marveled", + "marvelling": "marveling", + "marvellous": "marvelous", + "marvellously": "marvelously", + "materialisation": "materialization", + "materialise": "materialize", + "materialised": "materialized", + "materialises": "materializes", + "materialising": "materializing", + "maximisation": "maximization", + "maximise": "maximize", + "maximised": "maximized", + "maximises": "maximizes", + "maximising": "maximizing", + "meagre": "meager", + "mechanisation": "mechanization", + "mechanise": "mechanize", + "mechanised": "mechanized", + "mechanises": "mechanizes", + "mechanising": "mechanizing", + "mediaeval": "medieval", + "memorialise": "memorialize", + "memorialised": "memorialized", + "memorialises": "memorializes", + "memorialising": "memorializing", + "memorise": "memorize", + "memorised": "memorized", + "memorises": "memorizes", + "memorising": "memorizing", + "mesmerise": "mesmerize", + "mesmerised": "mesmerized", + "mesmerises": "mesmerizes", + "mesmerising": "mesmerizing", + "metabolise": "metabolize", + "metabolised": "metabolized", + "metabolises": "metabolizes", + "metabolising": "metabolizing", + "metre": "meter", + "metres": "meters", + "mhm": "hmm", + "micrometre": "micrometer", + "micrometres": "micrometers", + "militarise": "militarize", + "militarised": "militarized", + "militarises": "militarizes", + "militarising": "militarizing", + "milligramme": "milligram", + "milligrammes": "milligrams", + "millilitre": "milliliter", + "millilitres": "milliliters", + "millimetre": "millimeter", + "millimetres": "millimeters", + "miniaturisation": "miniaturization", + "miniaturise": "miniaturize", + "miniaturised": "miniaturized", + "miniaturises": "miniaturizes", + "miniaturising": "miniaturizing", + "minibusses": "minibuses", + "minimise": "minimize", + "minimised": "minimized", + "minimises": "minimizes", + "minimising": "minimizing", + "misbehaviour": "misbehavior", + "misdemeanour": "misdemeanor", + "misdemeanours": "misdemeanors", + "misspelt": "misspelled", + "mitre": "miter", + "mitres": "miters", + "mm": "hmm", + "mmm": "hmm", + "mobilisation": "mobilization", + "mobilise": "mobilize", + "mobilised": "mobilized", + "mobilises": "mobilizes", + "mobilising": "mobilizing", + "modelled": "modeled", + "modeller": "modeler", + "modellers": "modelers", + "modelling": "modeling", + "modernise": "modernize", + "modernised": "modernized", + "modernises": "modernizes", + "modernising": "modernizing", + "moisturise": "moisturize", + "moisturised": "moisturized", + "moisturiser": "moisturizer", + "moisturisers": "moisturizers", + "moisturises": "moisturizes", + "moisturising": "moisturizing", + "monologue": "monolog", + "monologues": "monologs", + "monopolisation": "monopolization", + "monopolise": "monopolize", + "monopolised": "monopolized", + "monopolises": "monopolizes", + "monopolising": "monopolizing", + "moralise": "moralize", + "moralised": "moralized", + "moralises": "moralizes", + "moralising": "moralizing", + "motorised": "motorized", + "mould": "mold", + "moulded": "molded", + "moulder": "molder", + "mouldered": "moldered", + "mouldering": "moldering", + "moulders": "molders", + "mouldier": "moldier", + "mouldiest": "moldiest", + "moulding": "molding", + "mouldings": "moldings", + "moulds": "molds", + "mouldy": "moldy", + "moult": "molt", + "moulted": "molted", + "moulting": "molting", + "moults": "molts", + "moustache": "mustache", + "moustached": "mustached", + "moustaches": "mustaches", + "moustachioed": "mustachioed", + "multicoloured": "multicolored", + "nationalisation": "nationalization", + "nationalisations": "nationalizations", + "nationalise": "nationalize", + "nationalised": "nationalized", + "nationalises": "nationalizes", + "nationalising": "nationalizing", + "naturalisation": "naturalization", + "naturalise": "naturalize", + "naturalised": "naturalized", + "naturalises": "naturalizes", + "naturalising": "naturalizing", + "neighbour": "neighbor", + "neighbourhood": "neighborhood", + "neighbourhoods": "neighborhoods", + "neighbouring": "neighboring", + "neighbourliness": "neighborliness", + "neighbourly": "neighborly", + "neighbours": "neighbors", + "neutralisation": "neutralization", + "neutralise": "neutralize", + "neutralised": "neutralized", + "neutralises": "neutralizes", + "neutralising": "neutralizing", + "normalisation": "normalization", + "normalise": "normalize", + "normalised": "normalized", + "normalises": "normalizes", + "normalising": "normalizing", + "odour": "odor", + "odourless": "odorless", + "odours": "odors", + "oesophagus": "esophagus", + "oesophaguses": "esophaguses", + "oestrogen": "estrogen", + "offence": "offense", + "offences": "offenses", + "omelette": "omelet", + "omelettes": "omelets", + "optimise": "optimize", + "optimised": "optimized", + "optimises": "optimizes", + "optimising": "optimizing", + "organisation": "organization", + "organisational": "organizational", + "organisations": "organizations", + "organise": "organize", + "organised": "organized", + "organiser": "organizer", + "organisers": "organizers", + "organises": "organizes", + "organising": "organizing", + "orthopaedic": "orthopedic", + "orthopaedics": "orthopedics", + "ostracise": "ostracize", + "ostracised": "ostracized", + "ostracises": "ostracizes", + "ostracising": "ostracizing", + "outmanoeuvre": "outmaneuver", + "outmanoeuvred": "outmaneuvered", + "outmanoeuvres": "outmaneuvers", + "outmanoeuvring": "outmaneuvering", + "overemphasise": "overemphasize", + "overemphasised": "overemphasized", + "overemphasises": "overemphasizes", + "overemphasising": "overemphasizing", + "oxidisation": "oxidization", + "oxidise": "oxidize", + "oxidised": "oxidized", + "oxidises": "oxidizes", + "oxidising": "oxidizing", + "paederast": "pederast", + "paederasts": "pederasts", + "paediatric": "pediatric", + "paediatrician": "pediatrician", + "paediatricians": "pediatricians", + "paediatrics": "pediatrics", + "paedophile": "pedophile", + "paedophiles": "pedophiles", + "paedophilia": "pedophilia", + "palaeolithic": "paleolithic", + "palaeontologist": "paleontologist", + "palaeontologists": "paleontologists", + "palaeontology": "paleontology", + "panelled": "paneled", + "panelling": "paneling", + "panellist": "panelist", + "panellists": "panelists", + "paralyse": "paralyze", + "paralysed": "paralyzed", + "paralyses": "paralyzes", + "paralysing": "paralyzing", + "parcelled": "parceled", + "parcelling": "parceling", + "parlour": "parlor", + "parlours": "parlors", + "particularise": "particularize", + "particularised": "particularized", + "particularises": "particularizes", + "particularising": "particularizing", + "passivisation": "passivization", + "passivise": "passivize", + "passivised": "passivized", + "passivises": "passivizes", + "passivising": "passivizing", + "pasteurisation": "pasteurization", + "pasteurise": "pasteurize", + "pasteurised": "pasteurized", + "pasteurises": "pasteurizes", + "pasteurising": "pasteurizing", + "patronise": "patronize", + "patronised": "patronized", + "patronises": "patronizes", + "patronising": "patronizing", + "patronisingly": "patronizingly", + "pedalled": "pedaled", + "pedalling": "pedaling", + "pedestrianisation": "pedestrianization", + "pedestrianise": "pedestrianize", + "pedestrianised": "pedestrianized", + "pedestrianises": "pedestrianizes", + "pedestrianising": "pedestrianizing", + "penalise": "penalize", + "penalised": "penalized", + "penalises": "penalizes", + "penalising": "penalizing", + "pencilled": "penciled", + "pencilling": "penciling", + "personalise": "personalize", + "personalised": "personalized", + "personalises": "personalizes", + "personalising": "personalizing", + "pharmacopoeia": "pharmacopeia", + "pharmacopoeias": "pharmacopeias", + "philosophise": "philosophize", + "philosophised": "philosophized", + "philosophises": "philosophizes", + "philosophising": "philosophizing", + "philtre": "filter", + "philtres": "filters", + "phoney": "phony", + "plagiarise": "plagiarize", + "plagiarised": "plagiarized", + "plagiarises": "plagiarizes", + "plagiarising": "plagiarizing", + "plough": "plow", + "ploughed": "plowed", + "ploughing": "plowing", + "ploughman": "plowman", + "ploughmen": "plowmen", + "ploughs": "plows", + "ploughshare": "plowshare", + "ploughshares": "plowshares", + "polarisation": "polarization", + "polarise": "polarize", + "polarised": "polarized", + "polarises": "polarizes", + "polarising": "polarizing", + "politicisation": "politicization", + "politicise": "politicize", + "politicised": "politicized", + "politicises": "politicizes", + "politicising": "politicizing", + "popularisation": "popularization", + "popularise": "popularize", + "popularised": "popularized", + "popularises": "popularizes", + "popularising": "popularizing", + "pouffe": "pouf", + "pouffes": "poufs", + "practise": "practice", + "practised": "practiced", + "practises": "practices", + "practising": "practicing", + "praesidium": "presidium", + "praesidiums": "presidiums", + "pressurisation": "pressurization", + "pressurise": "pressurize", + "pressurised": "pressurized", + "pressurises": "pressurizes", + "pressurising": "pressurizing", + "pretence": "pretense", + "pretences": "pretenses", + "primaeval": "primeval", + "prioritisation": "prioritization", + "prioritise": "prioritize", + "prioritised": "prioritized", + "prioritises": "prioritizes", + "prioritising": "prioritizing", + "privatisation": "privatization", + "privatisations": "privatizations", + "privatise": "privatize", + "privatised": "privatized", + "privatises": "privatizes", + "privatising": "privatizing", + "professionalisation": "professionalization", + "professionalise": "professionalize", + "professionalised": "professionalized", + "professionalises": "professionalizes", + "professionalising": "professionalizing", + "programme": "program", + "programmes": "programs", + "prologue": "prolog", + "prologues": "prologs", + "propagandise": "propagandize", + "propagandised": "propagandized", + "propagandises": "propagandizes", + "propagandising": "propagandizing", + "proselytise": "proselytize", + "proselytised": "proselytized", + "proselytiser": "proselytizer", + "proselytisers": "proselytizers", + "proselytises": "proselytizes", + "proselytising": "proselytizing", + "psychoanalyse": "psychoanalyze", + "psychoanalysed": "psychoanalyzed", + "psychoanalyses": "psychoanalyzes", + "psychoanalysing": "psychoanalyzing", + "publicise": "publicize", + "publicised": "publicized", + "publicises": "publicizes", + "publicising": "publicizing", + "pulverisation": "pulverization", + "pulverise": "pulverize", + "pulverised": "pulverized", + "pulverises": "pulverizes", + "pulverising": "pulverizing", + "pummelled": "pummel", + "pummelling": "pummeled", + "pyjama": "pajama", + "pyjamas": "pajamas", + "pzazz": "pizzazz", + "quarrelled": "quarreled", + "quarrelling": "quarreling", + "radicalise": "radicalize", + "radicalised": "radicalized", + "radicalises": "radicalizes", + "radicalising": "radicalizing", + "rancour": "rancor", + "randomise": "randomize", + "randomised": "randomized", + "randomises": "randomizes", + "randomising": "randomizing", + "rationalisation": "rationalization", + "rationalisations": "rationalizations", + "rationalise": "rationalize", + "rationalised": "rationalized", + "rationalises": "rationalizes", + "rationalising": "rationalizing", + "ravelled": "raveled", + "ravelling": "raveling", + "realisable": "realizable", + "realisation": "realization", + "realisations": "realizations", + "realise": "realize", + "realised": "realized", + "realises": "realizes", + "realising": "realizing", + "recognisable": "recognizable", + "recognisably": "recognizably", + "recognisance": "recognizance", + "recognise": "recognize", + "recognised": "recognized", + "recognises": "recognizes", + "recognising": "recognizing", + "reconnoitre": "reconnoiter", + "reconnoitred": "reconnoitered", + "reconnoitres": "reconnoiters", + "reconnoitring": "reconnoitering", + "refuelled": "refueled", + "refuelling": "refueling", + "regularisation": "regularization", + "regularise": "regularize", + "regularised": "regularized", + "regularises": "regularizes", + "regularising": "regularizing", + "remodelled": "remodeled", + "remodelling": "remodeling", + "remould": "remold", + "remoulded": "remolded", + "remoulding": "remolding", + "remoulds": "remolds", + "reorganisation": "reorganization", + "reorganisations": "reorganizations", + "reorganise": "reorganize", + "reorganised": "reorganized", + "reorganises": "reorganizes", + "reorganising": "reorganizing", + "revelled": "reveled", + "reveller": "reveler", + "revellers": "revelers", + "revelling": "reveling", + "revitalise": "revitalize", + "revitalised": "revitalized", + "revitalises": "revitalizes", + "revitalising": "revitalizing", + "revolutionise": "revolutionize", + "revolutionised": "revolutionized", + "revolutionises": "revolutionizes", + "revolutionising": "revolutionizing", + "rhapsodise": "rhapsodize", + "rhapsodised": "rhapsodized", + "rhapsodises": "rhapsodizes", + "rhapsodising": "rhapsodizing", + "rigour": "rigor", + "rigours": "rigors", + "ritualised": "ritualized", + "rivalled": "rivaled", + "rivalling": "rivaling", + "romanticise": "romanticize", + "romanticised": "romanticized", + "romanticises": "romanticizes", + "romanticising": "romanticizing", + "rumour": "rumor", + "rumoured": "rumored", + "rumours": "rumors", + "sabre": "saber", + "sabres": "sabers", + "saltpetre": "saltpeter", + "sanitise": "sanitize", + "sanitised": "sanitized", + "sanitises": "sanitizes", + "sanitising": "sanitizing", + "satirise": "satirize", + "satirised": "satirized", + "satirises": "satirizes", + "satirising": "satirizing", + "saviour": "savior", + "saviours": "saviors", + "savour": "savor", + "savoured": "savored", + "savouries": "savories", + "savouring": "savoring", + "savours": "savors", + "savoury": "savory", + "scandalise": "scandalize", + "scandalised": "scandalized", + "scandalises": "scandalizes", + "scandalising": "scandalizing", + "sceptic": "skeptic", + "sceptical": "skeptical", + "sceptically": "skeptically", + "scepticism": "skepticism", + "sceptics": "skeptics", + "sceptre": "scepter", + "sceptres": "scepters", + "scrutinise": "scrutinize", + "scrutinised": "scrutinized", + "scrutinises": "scrutinizes", + "scrutinising": "scrutinizing", + "secularisation": "secularization", + "secularise": "secularize", + "secularised": "secularized", + "secularises": "secularizes", + "secularising": "secularizing", + "sensationalise": "sensationalize", + "sensationalised": "sensationalized", + "sensationalises": "sensationalizes", + "sensationalising": "sensationalizing", + "sensitise": "sensitize", + "sensitised": "sensitized", + "sensitises": "sensitizes", + "sensitising": "sensitizing", + "sentimentalise": "sentimentalize", + "sentimentalised": "sentimentalized", + "sentimentalises": "sentimentalizes", + "sentimentalising": "sentimentalizing", + "sepulchre": "sepulcher", + "sepulchres": "sepulchers", + "serialisation": "serialization", + "serialisations": "serializations", + "serialise": "serialize", + "serialised": "serialized", + "serialises": "serializes", + "serialising": "serializing", + "sermonise": "sermonize", + "sermonised": "sermonized", + "sermonises": "sermonizes", + "sermonising": "sermonizing", + "sheikh": "sheik", + "shovelled": "shoveled", + "shovelling": "shoveling", + "shrivelled": "shriveled", + "shrivelling": "shriveling", + "signalise": "signalize", + "signalised": "signalized", + "signalises": "signalizes", + "signalising": "signalizing", + "signalled": "signaled", + "signalling": "signaling", + "smoulder": "smolder", + "smouldered": "smoldered", + "smouldering": "smoldering", + "smoulders": "smolders", + "snivelled": "sniveled", + "snivelling": "sniveling", + "snorkelled": "snorkeled", + "snorkelling": "snorkeling", + "snowplough": "snowplow", + "snowploughs": "snowplow", + "socialisation": "socialization", + "socialise": "socialize", + "socialised": "socialized", + "socialises": "socializes", + "socialising": "socializing", + "sodomise": "sodomize", + "sodomised": "sodomized", + "sodomises": "sodomizes", + "sodomising": "sodomizing", + "solemnise": "solemnize", + "solemnised": "solemnized", + "solemnises": "solemnizes", + "solemnising": "solemnizing", + "sombre": "somber", + "specialisation": "specialization", + "specialisations": "specializations", + "specialise": "specialize", + "specialised": "specialized", + "specialises": "specializes", + "specialising": "specializing", + "spectre": "specter", + "spectres": "specters", + "spiralled": "spiraled", + "spiralling": "spiraling", + "splendour": "splendor", + "splendours": "splendors", + "squirrelled": "squirreled", + "squirrelling": "squirreling", + "stabilisation": "stabilization", + "stabilise": "stabilize", + "stabilised": "stabilized", + "stabiliser": "stabilizer", + "stabilisers": "stabilizers", + "stabilises": "stabilizes", + "stabilising": "stabilizing", + "standardisation": "standardization", + "standardise": "standardize", + "standardised": "standardized", + "standardises": "standardizes", + "standardising": "standardizing", + "stencilled": "stenciled", + "stencilling": "stenciling", + "sterilisation": "sterilization", + "sterilisations": "sterilizations", + "sterilise": "sterilize", + "sterilised": "sterilized", + "steriliser": "sterilizer", + "sterilisers": "sterilizers", + "sterilises": "sterilizes", + "sterilising": "sterilizing", + "stigmatisation": "stigmatization", + "stigmatise": "stigmatize", + "stigmatised": "stigmatized", + "stigmatises": "stigmatizes", + "stigmatising": "stigmatizing", + "storey": "story", + "storeys": "stories", + "subsidisation": "subsidization", + "subsidise": "subsidize", + "subsidised": "subsidized", + "subsidiser": "subsidizer", + "subsidisers": "subsidizers", + "subsidises": "subsidizes", + "subsidising": "subsidizing", + "succour": "succor", + "succoured": "succored", + "succouring": "succoring", + "succours": "succors", + "sulphate": "sulfate", + "sulphates": "sulfates", + "sulphide": "sulfide", + "sulphides": "sulfides", + "sulphur": "sulfur", + "sulphurous": "sulfurous", + "summarise": "summarize", + "summarised": "summarized", + "summarises": "summarizes", + "summarising": "summarizing", + "swivelled": "swiveled", + "swivelling": "swiveling", + "symbolise": "symbolize", + "symbolised": "symbolized", + "symbolises": "symbolizes", + "symbolising": "symbolizing", + "sympathise": "sympathize", + "sympathised": "sympathized", + "sympathiser": "sympathizer", + "sympathisers": "sympathizers", + "sympathises": "sympathizes", + "sympathising": "sympathizing", + "synchronisation": "synchronization", + "synchronise": "synchronize", + "synchronised": "synchronized", + "synchronises": "synchronizes", + "synchronising": "synchronizing", + "synthesise": "synthesize", + "synthesised": "synthesized", + "synthesiser": "synthesizer", + "synthesisers": "synthesizers", + "synthesises": "synthesizes", + "synthesising": "synthesizing", + "syphon": "siphon", + "syphoned": "siphoned", + "syphoning": "siphoning", + "syphons": "siphons", + "systematisation": "systematization", + "systematise": "systematize", + "systematised": "systematized", + "systematises": "systematizes", + "systematising": "systematizing", + "tantalise": "tantalize", + "tantalised": "tantalized", + "tantalises": "tantalizes", + "tantalising": "tantalizing", + "tantalisingly": "tantalizingly", + "tasselled": "tasseled", + "technicolour": "technicolor", + "temporise": "temporize", + "temporised": "temporized", + "temporises": "temporizes", + "temporising": "temporizing", + "tenderise": "tenderize", + "tenderised": "tenderized", + "tenderises": "tenderizes", + "tenderising": "tenderizing", + "terrorise": "terrorize", + "terrorised": "terrorized", + "terrorises": "terrorizes", + "terrorising": "terrorizing", + "theatre": "theater", + "theatregoer": "theatergoer", + "theatregoers": "theatergoers", + "theatres": "theaters", + "theorise": "theorize", + "theorised": "theorized", + "theorises": "theorizes", + "theorising": "theorizing", + "tonne": "ton", + "tonnes": "tons", + "towelled": "toweled", + "towelling": "toweling", + "toxaemia": "toxemia", + "tranquillise": "tranquilize", + "tranquillised": "tranquilized", + "tranquilliser": "tranquilizer", + "tranquillisers": "tranquilizers", + "tranquillises": "tranquilizes", + "tranquillising": "tranquilizing", + "tranquillity": "tranquility", + "tranquillize": "tranquilize", + "tranquillized": "tranquilized", + "tranquillizer": "tranquilizer", + "tranquillizers": "tranquilizers", + "tranquillizes": "tranquilizes", + "tranquillizing": "tranquilizing", + "tranquilly": "tranquility", + "transistorised": "transistorized", + "traumatise": "traumatize", + "traumatised": "traumatized", + "traumatises": "traumatizes", + "traumatising": "traumatizing", + "travelled": "traveled", + "traveller": "traveler", + "travellers": "travelers", + "travelling": "traveling", + "travelog": "travelogue", + "travelogs": "travelogues", + "trialled": "trialed", + "trialling": "trialing", + "tricolour": "tricolor", + "tricolours": "tricolors", + "trivialise": "trivialize", + "trivialised": "trivialized", + "trivialises": "trivializes", + "trivialising": "trivializing", + "tumour": "tumor", + "tumours": "tumors", + "tunnelled": "tunneled", + "tunnelling": "tunneling", + "tyrannise": "tyrannize", + "tyrannised": "tyrannized", + "tyrannises": "tyrannizes", + "tyrannising": "tyrannizing", + "tyre": "tire", + "tyres": "tires", + "unauthorised": "unauthorized", + "uncivilised": "uncivilized", + "underutilised": "underutilized", + "unequalled": "unequaled", + "unfavourable": "unfavorable", + "unfavourably": "unfavorably", + "unionisation": "unionization", + "unionise": "unionize", + "unionised": "unionized", + "unionises": "unionizes", + "unionising": "unionizing", + "unorganised": "unorganized", + "unravelled": "unraveled", + "unravelling": "unraveling", + "unrecognisable": "unrecognizable", + "unrecognised": "unrecognized", + "unrivalled": "unrivaled", + "unsavoury": "unsavory", + "untrammelled": "untrammeled", + "urbanisation": "urbanization", + "urbanise": "urbanize", + "urbanised": "urbanized", + "urbanises": "urbanizes", + "urbanising": "urbanizing", + "utilisable": "utilizable", + "utilisation": "utilization", + "utilise": "utilize", + "utilised": "utilized", + "utilises": "utilizes", + "utilising": "utilizing", + "valour": "valor", + "vandalise": "vandalize", + "vandalised": "vandalized", + "vandalises": "vandalizes", + "vandalising": "vandalizing", + "vaporisation": "vaporization", + "vaporise": "vaporize", + "vaporised": "vaporized", + "vaporises": "vaporizes", + "vaporising": "vaporizing", + "vapour": "vapor", + "vapours": "vapors", + "verbalise": "verbalize", + "verbalised": "verbalized", + "verbalises": "verbalizes", + "verbalising": "verbalizing", + "victimisation": "victimization", + "victimise": "victimize", + "victimised": "victimized", + "victimises": "victimizes", + "victimising": "victimizing", + "videodisc": "videodisk", + "videodiscs": "videodisks", + "vigour": "vigor", + "visualisation": "visualization", + "visualisations": "visualizations", + "visualise": "visualize", + "visualised": "visualized", + "visualises": "visualizes", + "visualising": "visualizing", + "vocalisation": "vocalization", + "vocalisations": "vocalizations", + "vocalise": "vocalize", + "vocalised": "vocalized", + "vocalises": "vocalizes", + "vocalising": "vocalizing", + "vulcanised": "vulcanized", + "vulgarisation": "vulgarization", + "vulgarise": "vulgarize", + "vulgarised": "vulgarized", + "vulgarises": "vulgarizes", + "vulgarising": "vulgarizing", + "waggon": "wagon", + "waggons": "wagons", + "watercolour": "watercolor", + "watercolours": "watercolors", + "weaselled": "weaseled", + "weaselling": "weaseling", + "westernisation": "westernization", + "westernise": "westernize", + "westernised": "westernized", + "westernises": "westernizes", + "westernising": "westernizing", + "womanise": "womanize", + "womanised": "womanized", + "womaniser": "womanizer", + "womanisers": "womanizers", + "womanises": "womanizes", + "womanising": "womanizing", + "woollen": "woolen", + "woollens": "woolens", + "woollies": "woolies", + "woolly": "wooly", + "worshipped": "worshiped", + "worshipper": "worshiper", + "worshipping": "worshiping", + "yodelled": "yodeled", + "yodelling": "yodeling", + "yoghourt": "yogurt", + "yoghourts": "yogurts", + "yoghurt": "yogurt", + "yoghurts": "yogurts" +] diff --git a/Tests/WhisperKitTests/Evaluate/WERUtils.swift b/Tests/WhisperKitTests/Evaluate/WERUtils.swift new file mode 100644 index 0000000..1885a78 --- /dev/null +++ b/Tests/WhisperKitTests/Evaluate/WERUtils.swift @@ -0,0 +1,112 @@ +import Foundation + +// Return the operations needed to transform s1 into s2 using Wagner-Fischer algo. +// "i" = insertion, "d" = deletion, "r" = replacement +enum EditOp:UInt8{ + case blank + case replace + case delete + case insert +} + +class WERUtils{ + static func wordsToChars(reference: [[String]], hypothesis: [[String]]) -> ([String],[String]){ + //tokenize each word into an integer + let vocabulary = Set((reference + hypothesis).flatMap{$0}) + let word2char = Dictionary(uniqueKeysWithValues: vocabulary.enumerated().map { index, value in + return (value, index) + }) + + let referenceCharsEfficient = reference.map { sentence in + String(sentence.lazy.compactMap { word in + if let charCode = word2char[word], let unicodeScalar = UnicodeScalar(charCode) { + return Character(unicodeScalar) + } + return nil + }) + } + + let hypothesisCharsEfficient = hypothesis.map { sentence in + String(sentence.lazy.compactMap { word in + if let charCode = word2char[word], let unicodeScalar = UnicodeScalar(charCode) { + return Character(unicodeScalar) + } + return nil + }) + } + + return (referenceCharsEfficient, hypothesisCharsEfficient) + } + + static func processWords(reference: [String], hypothesis: [String]) -> Double{ + var refTransformed = NormalizationUtils.removeMultipleSpaces(sentences: reference) + refTransformed = NormalizationUtils.strip(sentences: refTransformed) + let refTransformedReduced = NormalizationUtils.reduceToListOfListOfWords(sentences: refTransformed) + + var hypTransformed = NormalizationUtils.removeMultipleSpaces(sentences: hypothesis) + hypTransformed = NormalizationUtils.strip(sentences: hypTransformed) + let hypTransformedReduced = NormalizationUtils.reduceToListOfListOfWords(sentences: hypTransformed) + + let (refAsChars, hypAsChars) = WERUtils.wordsToChars(reference: refTransformedReduced, hypothesis: hypTransformedReduced) + + let refArrays = refAsChars.map({Array($0.unicodeScalars)}) + let hypArrays = hypAsChars.map({Array($0.unicodeScalars)}) + + var (numHits, numSubstitutions, numDeletions, numInsertions) = (0, 0, 0, 0) + var (numRfWords, numHypWords) = (0, 0) + + for (reference_sentence, hypothesis_sentence) in zip(refArrays, hypArrays){ + // Get the required edit operations to transform reference into hypothesis + let editOps = hirschberg(reference_sentence, hypothesis_sentence) + + // count the number of edits of each type + var substitutions: Int = 0 + var deletions: Int = 0 + var insertions: Int = 0 + + for op in editOps{ + switch op{ + case .replace: + substitutions += 1 + continue + case .delete: + deletions += 1 + continue + case .insert: + insertions += 1 + continue + case .blank: + continue + } + } + + let hits:Int = reference_sentence.count - (substitutions + deletions) + + // update state + numHits += hits + numSubstitutions += substitutions + numDeletions += deletions + numInsertions += insertions + numRfWords += reference_sentence.count + numHypWords += hypothesis_sentence.count + } + let (S, D, I, H) = (numSubstitutions, numDeletions, numInsertions, numHits) + + let wer = Double(S + D + I) / Double(H + S + D) + + return wer + } + + static func evaluate(originalTranscript: String, generatedTranscript: String, normalizeOriginal: Bool = false) -> Double{ + var wer: Double = -Double.infinity + let normalizer = EnglishTextNormalizer() + let reference = normalizeOriginal ? normalizer.normalize(text: originalTranscript) : originalTranscript + let hypothesis = normalizer.normalize(text: generatedTranscript) + + wer = WERUtils.processWords( + reference: [reference], + hypothesis: [hypothesis] + ) + return wer + } +} diff --git a/Tests/WhisperKitTests/MemoryTestUtils.swift b/Tests/WhisperKitTests/MemoryTestUtils.swift index 6a6f403..62eaab0 100644 --- a/Tests/WhisperKitTests/MemoryTestUtils.swift +++ b/Tests/WhisperKitTests/MemoryTestUtils.swift @@ -1,5 +1,19 @@ import Foundation import WhisperKit +import MachO +import CoreML + +#if canImport(UIKit) +import UIKit +#endif + +#if canImport(IOKit) +import IOKit.ps +#endif + +#if os(watchOS) +import WatchKit +#endif // MARK: RegressionStats @@ -7,11 +21,20 @@ class RegressionStats: JSONCodable { let testInfo: TestInfo let memoryStats: MemoryStats let latencyStats: LatencyStats + let staticAttributes: StaticAttributes + let systemMeasurements: SystemMeasurements - init(testInfo: TestInfo, memoryStats: MemoryStats, latencyStats: LatencyStats) { + init(testInfo: TestInfo, + memoryStats: MemoryStats, + latencyStats: LatencyStats, + staticAttributes: StaticAttributes, + systemMeasurements: SystemMeasurements + ) { self.testInfo = testInfo self.memoryStats = memoryStats self.latencyStats = latencyStats + self.staticAttributes = staticAttributes + self.systemMeasurements = systemMeasurements } func jsonData() throws -> Data { @@ -28,8 +51,9 @@ class TestInfo: JSONCodable { let timeElapsedInSeconds: TimeInterval let timings: TranscriptionTimings? let transcript: String? + let wer: Double - init(device: String, audioFile: String, model: String, date: String, timeElapsedInSeconds: TimeInterval, timings: TranscriptionTimings?, transcript: String?) { + init(device: String, audioFile: String, model: String, date: String, timeElapsedInSeconds: TimeInterval, timings: TranscriptionTimings?, transcript: String?, wer: Double) { self.device = device self.audioFile = audioFile self.model = model @@ -37,6 +61,7 @@ class TestInfo: JSONCodable { self.timeElapsedInSeconds = timeElapsedInSeconds self.timings = timings self.transcript = transcript + self.wer = wer } } @@ -83,6 +108,36 @@ class Stats: JSONCodable { } } +// MARK: StaticAttributes +class StaticAttributes: Codable{ + let osVersion: String + let isLowPowerMode: String + let encoderCompute: String + let decoderCompute: String + + init(encoderCompute: MLComputeUnits, decoderCompute: MLComputeUnits){ + let version = ProcessInfo.processInfo.operatingSystemVersion + self.osVersion = "\(version.majorVersion).\(version.minorVersion).\(version.patchVersion)" + self.isLowPowerMode = ProcessInfo.processInfo.isLowPowerModeEnabled ? "Enabled" : "Disabled" + self.encoderCompute = encoderCompute.stringValue + self.decoderCompute = decoderCompute.stringValue + } +} + +class SystemMeasurements: Codable{ + let systemMemory: [SystemMemoryUsage] + let diskSpace: [DiskSpace] + let batteryLevel: [Float] + let timeElapsed: [TimeInterval] + + init(systemMemory: [SystemMemoryUsage], diskSpace: [DiskSpace], batteryLevel: [Float], timeElapsed: [TimeInterval]) { + self.systemMemory = systemMemory + self.diskSpace = diskSpace + self.batteryLevel = batteryLevel + self.timeElapsed = timeElapsed + } +} + // MARK: LatencyStats class LatencyStats: Stats { @@ -155,7 +210,7 @@ extension Data { // MARK: - SystemMemoryChecker @available(macOS 13, iOS 16, watchOS 10, visionOS 1, *) -class SystemMemoryChecker: NSObject { +class AppMemoryChecker: NSObject { static func getMemoryUsed() -> UInt64 { // The `TASK_VM_INFO_COUNT` and `TASK_VM_INFO_REV1_COUNT` macros are too // complex for the Swift C importer, so we have to define them ourselves. @@ -180,3 +235,182 @@ class SystemMemoryChecker: NSObject { return usedMB } } + +@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *) +class SystemMemoryCheckerAdvanced: NSObject { + + static func getMemoryUsage() -> SystemMemoryUsage { + // Get total and available memory using host_statistics64 + var stats = vm_statistics64() + var count = mach_msg_type_number_t(MemoryLayout.size(ofValue: stats) / MemoryLayout.size) + let hostPort = mach_host_self() + let result = withUnsafeMutablePointer(to: &stats) { statsPtr -> kern_return_t in + statsPtr.withMemoryRebound(to: integer_t.self, capacity: Int(count)) { intPtr in + host_statistics64(hostPort, HOST_VM_INFO64, intPtr, &count) + } + } + + guard result == KERN_SUCCESS else { + return SystemMemoryUsage(totalAvailableGB: 0, totalUsedGB: 0, appAllocatedGB: 0, appUsedGB: 0, swapUsedGB: 0) + } + + let pageSize = UInt64(vm_kernel_page_size) + let totalMemory = Float(ProcessInfo.processInfo.physicalMemory) / 1024 / 1024 / 1024 + let freeMemory = Float(stats.free_count) * Float(pageSize) / 1024 / 1024 / 1024 + let inactiveMemory = Float(stats.inactive_count) * Float(pageSize) / 1024 / 1024 / 1024 + let availableMemory = freeMemory + inactiveMemory + let activeMemory = Float(stats.active_count) * Float(pageSize) / 1024 / 1024 / 1024 + let wiredMemory = Float(stats.wire_count) * Float(pageSize) / 1024 / 1024 / 1024 + let usedMemory = totalMemory - availableMemory + + // Get task-specific memory footprint using task_info + let TASK_VM_INFO_COUNT = mach_msg_type_number_t(MemoryLayout.size / MemoryLayout.size) + guard let offset = MemoryLayout.offset(of: \task_vm_info_data_t.min_address) else { + return SystemMemoryUsage(totalAvailableGB: 0, totalUsedGB: 0, appAllocatedGB: 0, appUsedGB: 0, swapUsedGB: 0) + } + let TASK_VM_INFO_REV1_COUNT = mach_msg_type_number_t(offset / MemoryLayout.size) + var info = task_vm_info_data_t() + var countInfo = TASK_VM_INFO_COUNT + let kr = withUnsafeMutablePointer(to: &info) { infoPtr in + infoPtr.withMemoryRebound(to: integer_t.self, capacity: Int(countInfo)) { intPtr in + task_info(mach_task_self_, task_flavor_t(TASK_VM_INFO), intPtr, &countInfo) + } + } + + guard + kr == KERN_SUCCESS, + countInfo >= TASK_VM_INFO_REV1_COUNT + else { + return SystemMemoryUsage(totalAvailableGB: 0, totalUsedGB: 0, appAllocatedGB: 0, appUsedGB: 0, swapUsedGB: 0) + } + + let appAllocatedBytes = UInt64(info.phys_footprint) + let appAllocatedGB = Float(appAllocatedBytes) / 1024 / 1024 / 1024 + + let appUsedBytes = UInt64(info.resident_size) + let appUsedGB = Float(appUsedBytes) / 1024 / 1024 / 1024 + + // Get swap memory usage + let swapUsedBytes = UInt64(stats.swapouts) * pageSize + let swapUsedGB = Float(swapUsedBytes) / 1024 / 1024 / 1024 + + return SystemMemoryUsage(totalAvailableGB: availableMemory, totalUsedGB: usedMemory, appAllocatedGB: appAllocatedGB, appUsedGB: appUsedGB, swapUsedGB: swapUsedGB) + } +} + +class BatteryLevelChecker: NSObject { + static func getBatteryLevel() -> Float? { + #if os(iOS) || os(visionOS) + UIDevice.current.isBatteryMonitoringEnabled = true + let batteryLevel = UIDevice.current.batteryLevel + UIDevice.current.isBatteryMonitoringEnabled = false + return batteryLevel >= 0 ? batteryLevel * 100 : nil + #elseif os(watchOS) + let batteryLevel = WKInterfaceDevice.current().batteryLevel + return batteryLevel >= 0 ? batteryLevel * 100 : nil + #elseif os(macOS) + return getMacOSBatteryLevel() + #else + return nil + #endif + } + + #if os(macOS) + private static func getMacOSBatteryLevel() -> Float? { + let snapshot = IOPSCopyPowerSourcesInfo().takeRetainedValue() + let sources = IOPSCopyPowerSourcesList(snapshot).takeRetainedValue() as [CFTypeRef] + for ps in sources { + if let description = IOPSGetPowerSourceDescription(snapshot, ps).takeUnretainedValue() as? [String: Any] { + if let currentCapacity = description[kIOPSCurrentCapacityKey] as? Int, + let maxCapacity = description[kIOPSMaxCapacityKey] as? Int { + return (Float(currentCapacity) / Float(maxCapacity)) * 100 + } + } + } + return nil + } + #endif +} + +struct DiskSpace: Codable { + let totalSpaceGB: Float? + let freeSpaceGB: Float? +} + +struct SystemMemoryUsage: Codable { + let totalAvailableGB: Float + let totalUsedGB: Float + let appAllocatedGB: Float + let appUsedGB: Float + let swapUsedGB: Float +} + +class DiskSpaceChecker: NSObject { + static func getDiskSpace() -> DiskSpace { + #if os(iOS) || os(watchOS) || os(visionOS) + return getMobileOSDiskSpace() + #elseif os(macOS) + return getMacOSDiskSpace() + #else + return DiskSpace(totalSpaceGB: nil, freeSpaceGB: nil) + #endif + } + + #if os(iOS) || os(watchOS) || os(visionOS) + private static func getMobileOSDiskSpace() -> DiskSpace { + let fileManager = FileManager.default + do { + let attributes = try fileManager.attributesOfFileSystem(forPath: NSHomeDirectory()) + if let totalSpace = attributes[.systemSize] as? NSNumber, + let freeSpace = attributes[.systemFreeSize] as? NSNumber { + return DiskSpace( + totalSpaceGB: Float(truncating: totalSpace) / 1024 / 1024 / 1024, + freeSpaceGB: Float(truncating: freeSpace) / 1024 / 1024 / 1024 + ) + } + } catch { + print("Error retrieving file system attributes: \(error)") + } + return DiskSpace(totalSpaceGB: nil, freeSpaceGB: nil) + } + #endif + + #if os(macOS) + private static func getMacOSDiskSpace() -> DiskSpace { + let fileManager = FileManager.default + do { + let homeDirectory = fileManager.homeDirectoryForCurrentUser + let attributes = try fileManager.attributesOfFileSystem(forPath: homeDirectory.path) + if let totalSpace = attributes[.systemSize] as? NSNumber, + let freeSpace = attributes[.systemFreeSize] as? NSNumber { + return DiskSpace( + totalSpaceGB: Float(truncating: totalSpace) / 1024 / 1024 / 1024, + freeSpaceGB: Float(truncating: freeSpace) / 1024 / 1024 / 1024 + ) + } + } catch { + print("Error retrieving file system attributes: \(error)") + } + return DiskSpace(totalSpaceGB: nil, freeSpaceGB: nil) + } + #endif +} + + + +private extension MLComputeUnits{ + var stringValue: String { + switch self { + case .cpuOnly: + return "CPU Only" + case .cpuAndGPU: + return "CPU and GPU" + case .all: + return "All" + case .cpuAndNeuralEngine: + return "CPU and Neural Engine" + @unknown default: + return "Unknown" + } + } +} diff --git a/Tests/WhisperKitTests/RegressionTests.swift b/Tests/WhisperKitTests/RegressionTests.swift index 15f90ea..e61b583 100644 --- a/Tests/WhisperKitTests/RegressionTests.swift +++ b/Tests/WhisperKitTests/RegressionTests.swift @@ -2,15 +2,18 @@ import CoreML import Hub @testable import WhisperKit import XCTest +import Foundation +import UniformTypeIdentifiers @available(macOS 13, iOS 16, watchOS 10, visionOS 1, *) final class RegressionTests: XCTestCase { - var audioFileURL: URL? + var audioFileURLs: [URL]? + var metadataURL: URL? + var testWERURLs: [URL]? override func setUp() { super.setUp() - - if self.audioFileURL == nil { + if self.audioFileURLs == nil || self.metadataURL == nil || self.testWERURLs == nil{ let expectation = XCTestExpectation(description: "Download test audio") downloadTestAudio { success in if success { @@ -20,19 +23,34 @@ final class RegressionTests: XCTestCase { } } // Wait for the expectation with a timeout - wait(for: [expectation], timeout: 30) + wait(for: [expectation], timeout: 300) } } - func downloadTestAudio(completion: @escaping (Bool) -> Void) { + private func downloadTestAudio(completion: @escaping (Bool) -> Void) { Task { do { let earnings22CompressedDataset = Hub.Repo(id: "argmaxinc/whisperkit-test-data", type: .datasets) let tempPath = FileManager.default.temporaryDirectory let downloadBase = tempPath.appending(component: "huggingface") let hubApi = HubApi(downloadBase: downloadBase) - let fileURL = try await hubApi.snapshot(from: earnings22CompressedDataset, matching: ["4484146.mp3"]) - self.audioFileURL = fileURL.appending(component: "4484146.mp3") + let repoURL = try await hubApi.snapshot(from: earnings22CompressedDataset, matching: ["*.mp3","*.txt"]) + + var audioFileURLs: [URL] = [] + var testWERURLs: [URL] = [] + for file in try FileManager.default.contentsOfDirectory(atPath: repoURL.path()){ + if file.hasSuffix(".mp3"){ + audioFileURLs.append(repoURL.appending(component: file)) + }else if file.hasSuffix(".txt"){ + testWERURLs.append(repoURL.appending(component: file)) + } + } + self.audioFileURLs = audioFileURLs + self.testWERURLs = testWERURLs + + let earnings22OriginalDataset = Hub.Repo(id: "argmaxinc/earnings22-12hours", type: .datasets) + let metadataURL = try await hubApi.snapshot(from: earnings22OriginalDataset, matching: ["metadata.json"]) + self.metadataURL = metadataURL.appending(component: "metadata.json") completion(true) } catch { XCTFail("Async setup failed with error: \(error)") @@ -40,74 +58,172 @@ final class RegressionTests: XCTestCase { } } } + + private func getTranscript(filename: String) -> String?{ + var transcript: String? = nil + if let metadataURL = self.metadataURL, let data = try? Data(contentsOf: metadataURL){ + if let json = try? JSONSerialization.jsonObject(with: data, options: []) as? [[String: Any]] { + for audioItem in json{ + if audioItem["audio"] as? String == filename{ + transcript = audioItem["transcription"] as? String + } + } + } + } + return transcript + } + + private func getWERTestData() -> (String?, String?){ + do{ + let testFileURLs = try XCTUnwrap( + self.testWERURLs, + "Test files for WER verification not found" + ) + var generatedText:String? = nil + var originalText:String? = nil + for file in testFileURLs{ + switch file.lastPathComponent{ + case "test_generated_transcript.txt": + generatedText = try? String(contentsOf: file) + case "test_original_transcript.txt": + originalText = try? String(contentsOf: file) + default: + continue + } + } + return (originalText, generatedText) + } + catch{ + XCTFail("Fetching test data for WER verification failed: \(error)") + } + return (nil,nil) + } - func testAndMeasureModelPerformance(model: String, device: String) async throws { - let audioFilePath = try XCTUnwrap( - self.audioFileURL?.path(), - "Audio file not found" - ) - - let startTime = Date() + func testAndMeasureModelPerformance(model: String, device: String, overEntireDataset: Bool) async throws { + var resultJSON:[RegressionStats] = [] let iso8601DateTimeString = ISO8601DateFormatter().string(from: Date()) - - var currentMemoryValues = [Float]() - var currentTPSValues = [Float]() - - let memoryStats = MemoryStats( - measurements: [], units: "MB", - totalNumberOfMeasurements: 0, - preTranscribeMemory: -1, - postTranscribeMemory: -1 - ) - let latencyStats = LatencyStats( - measurements: [], units: "Tokens/Sec", - totalNumberOfMeasurements: 0 - ) - var count = 0 - - let callback = { - (result: TranscriptionProgress) -> Bool in - count += 1 - let currentMemory = SystemMemoryChecker.getMemoryUsed() - let currentTPS = result.timings.tokensPerSecond - if currentMemory != 0 { - currentMemoryValues.append(Float(currentMemory)) + let audioFilePaths = try XCTUnwrap( + self.audioFileURLs, + "Audio files not found" + ).map({$0.path()}) + + for audioFilePath in audioFilePaths{ + let startTime = Date() + + var currentAppMemoryValues = [Float]() + var currentTPSValues = [Float]() + + let memoryStats = MemoryStats( + measurements: [], units: "MB", + totalNumberOfMeasurements: 0, + preTranscribeMemory: -1, + postTranscribeMemory: -1 + ) + let latencyStats = LatencyStats( + measurements: [], units: "Tokens/Sec", + totalNumberOfMeasurements: 0 + ) + var count = 0 + var lastTimeStamp = CFAbsoluteTimeGetCurrent() + + let callback = { + (result: TranscriptionProgress) -> Bool in + count += 1 + let currentMemory = AppMemoryChecker.getMemoryUsed() + let timeTaken = CFAbsoluteTimeGetCurrent() - lastTimeStamp + lastTimeStamp = CFAbsoluteTimeGetCurrent() + let currentTPS = Double(1/timeTaken) + + if currentMemory != 0 { + currentAppMemoryValues.append(Float(currentMemory)) + } + if !currentTPS.isNaN { + currentTPSValues.append(Float(currentTPS)) + } + if count % 100 == 1 { + let timeElapsed = Date().timeIntervalSince(startTime) + memoryStats.measure(from: currentAppMemoryValues, timeElapsed: timeElapsed) + latencyStats.measure(from: currentTPSValues, timeElapsed: timeElapsed) + currentAppMemoryValues = [] + currentTPSValues = [] + } + return true } - if !currentTPS.isNaN { - currentTPSValues.append(Float(currentTPS)) + + let whisperKit = try await WhisperKit(model: model) + memoryStats.preTranscribeMemory = Float(AppMemoryChecker.getMemoryUsed()) + + var systemMemory: [SystemMemoryUsage] = [] + var diskSpace: [DiskSpace] = [] + var batteryLevel: [Float] = [] + var timerTimeElapsed: [TimeInterval] = [] + // DispatchSourceTimer to collect memory usage asynchronously + let timerQueue = DispatchQueue(label: "com.example.SystemStatTimerQueue") + let timer = DispatchSource.makeTimerSource(queue: timerQueue) + timer.schedule(deadline: .now(), repeating: 1.0) + timer.setEventHandler { + systemMemory.append(SystemMemoryCheckerAdvanced.getMemoryUsage()) + diskSpace.append(DiskSpaceChecker.getDiskSpace()) + batteryLevel.append(BatteryLevelChecker.getBatteryLevel() ?? -1) + timerTimeElapsed.append(Date().timeIntervalSince(startTime)) } - if count % 100 == 1 { - let timeElapsed = Date().timeIntervalSince(startTime) - memoryStats.measure(from: currentMemoryValues, timeElapsed: timeElapsed) - latencyStats.measure(from: currentTPSValues, timeElapsed: timeElapsed) - currentMemoryValues = [] - currentTPSValues = [] + timer.resume() + + let transcriptionResult = try await XCTUnwrapAsync( + await whisperKit.transcribe(audioPath: audioFilePath, callback: callback).first, + "Transcription failed" + ) + XCTAssert(transcriptionResult.text.isEmpty == false, "Transcription failed") + + memoryStats.postTranscribeMemory = Float(AppMemoryChecker.getMemoryUsed()) + + var wer = -Double.infinity + if let filename = audioFilePath.split(separator: "/").last,let originalTranscript = getTranscript(filename: String(filename)){ + wer = WERUtils.evaluate( + originalTranscript: originalTranscript, + generatedTranscript: transcriptionResult.text, + normalizeOriginal: true + ) + XCTAssert(wer != -Double.infinity, "Calculating WER failed.") + } + + let testInfo = TestInfo( + device: device, + audioFile: audioFilePath, + model: model, + date: startTime.formatted(Date.ISO8601FormatStyle().dateSeparator(.dash)), + timeElapsedInSeconds: Date().timeIntervalSince(startTime), + timings: transcriptionResult.timings, + transcript: transcriptionResult.text, + wer: wer + ) + let staticAttributes = StaticAttributes( + encoderCompute: whisperKit.modelCompute.audioEncoderCompute, + decoderCompute: whisperKit.modelCompute.textDecoderCompute + ) + let systemMeasurements = SystemMeasurements( + systemMemory: systemMemory, + diskSpace: diskSpace, + batteryLevel: batteryLevel, + timeElapsed: timerTimeElapsed + ) + let json = RegressionStats( + testInfo: testInfo, + memoryStats: memoryStats, + latencyStats: latencyStats, + staticAttributes: staticAttributes, + systemMeasurements: systemMeasurements + ) + resultJSON.append(json) + + if !overEntireDataset{ + break } - return true } - - let whisperKit = try await WhisperKit(model: model) - memoryStats.preTranscribeMemory = Float(SystemMemoryChecker.getMemoryUsed()) - - let transcriptionResult = try await XCTUnwrapAsync( - await whisperKit.transcribe(audioPath: audioFilePath, callback: callback).first, - "Transcription failed" - ) - XCTAssert(transcriptionResult.text.isEmpty == false, "Transcription failed") - - memoryStats.postTranscribeMemory = Float(SystemMemoryChecker.getMemoryUsed()) - let testInfo = TestInfo( - device: device, - audioFile: audioFilePath, - model: model, - date: startTime.formatted(Date.ISO8601FormatStyle().dateSeparator(.dash)), - timeElapsedInSeconds: Date().timeIntervalSince(startTime), - timings: transcriptionResult.timings, - transcript: transcriptionResult.text - ) - let json = RegressionStats(testInfo: testInfo, memoryStats: memoryStats, latencyStats: latencyStats) + do { - let attachment = try XCTAttachment(data: json.jsonData(), uniformTypeIdentifier: "json") + let jsonData = try JSONEncoder().encode(resultJSON) + let attachment = XCTAttachment(data: jsonData, uniformTypeIdentifier: UTType.json.identifier) attachment.lifetime = .keepAlways attachment.name = "\(device)_\(model)_\(iso8601DateTimeString).json" add(attachment) @@ -154,23 +270,32 @@ final class RegressionTests: XCTestCase { #if os(macOS) && arch(arm64) currentDevice = Process.processor #endif - + + //Remove trailing whitespace characters + while currentDevice.last?.isWhitespace == true { currentDevice = String(currentDevice.dropLast())} do { allModels = try await WhisperKit.fetchAvailableModels() + // TODO: Remove after testing + allModels = ["base"] } catch { XCTFail("Failed to fetch available models: \(error.localizedDescription)") } for model in allModels { do { - try await testAndMeasureModelPerformance(model: model, device: currentDevice) + try await testAndMeasureModelPerformance( + model: model, + device: currentDevice, + overEntireDataset: false + ) } catch { failureInfo[model] = error.localizedDescription } } let testReport = TestReport(device: currentDevice, modelsTested: allModels, failureInfo: failureInfo) do { - let attachment = try XCTAttachment(data: testReport.jsonData(), uniformTypeIdentifier: "json") + let jsonData = try testReport.jsonData() + let attachment = XCTAttachment(data: jsonData, uniformTypeIdentifier: UTType.json.identifier) attachment.lifetime = .keepAlways attachment.name = "\(currentDevice)_summary_\(iso8601DateTimeString).json" add(attachment) @@ -178,4 +303,23 @@ final class RegressionTests: XCTestCase { XCTFail("Failed with error: \(error)") } } + + func testLargeWER(){ + let texts = getWERTestData() + if let originalText = texts.0, let generatedText = texts.1{ + let wer = WERUtils.evaluate(originalTranscript: originalText, generatedTranscript: generatedText, normalizeOriginal: true) + XCTAssert(wer == 0.18961994278708622, "Expected wer: 0.18961994278708622 but computed \(wer)") + }else{ + XCTFail("Fetching WER test data failed.") + } + + } + + func testHirschberg(){ + let s1 = "With a rumble that echoed through the night, thunder crashed overhead, its raw power shaking the earth beneath it, leaving in its wake an exhilarating sense of awe. As rain poured down in torrents, the thunder boomed with a rhythm that seemed to speak a secret language, intertwining nature's symphony with an innovative melody that captivated all who listened." + let s2 = "In the midst of a summer storm, thunder erupted with a booming chorus, shaking the earth beneath our feet and electrifying the air with its powerful presence. The crackling symphony of thunderbolts danced across the darkened sky, illuminating the clouds with an innovative display of nature's raw energy." + let ops = hirschberg(Array(s1.unicodeScalars), Array(s2.unicodeScalars)) + XCTAssert(ops.count == 228) + } + }