Skip to content

Commit

Permalink
feat: add enum validator and refactor validation a bit
Browse files Browse the repository at this point in the history
  • Loading branch information
Jan Kühnlein committed Jan 24, 2024
1 parent c7b325f commit c58b047
Show file tree
Hide file tree
Showing 10 changed files with 210 additions and 90 deletions.
14 changes: 6 additions & 8 deletions app/temporal/src/activities.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@ import csv from "csv";
import { chunk } from "lodash";
import XLSX from "xlsx";
import { ColumnConfig } from "./domain/ColumnConfig";
import { DataAnalyzer, DataMappingRecommendation } from "./domain/DataAnalyzer";
import { ValidatorType } from "./domain/validators";
import {
ColumnValidators,
DataAnalyzer,
DataMappingRecommendation,
} from "./domain/DataAnalyzer";
import { FileStore } from "./infrastructure/FileStore";
import { Mapping } from "./workflows/importer.workflow";
export interface DownloadSourceFileParams {
Expand All @@ -17,11 +20,6 @@ export interface DownloadSourceFileReturnType {
localFilePath: string;
}

export type ValidatorColumns = Record<
ValidatorType,
{ column: string; regex?: string | undefined }[]
>;

export function makeActivities(
fileStore: FileStore,
dataAnalyzer: DataAnalyzer
Expand Down Expand Up @@ -148,7 +146,7 @@ export function makeActivities(
bucket: string;
fileReference: string;
statsFileReference: string;
validatorColumns: ValidatorColumns;
validatorColumns: ColumnValidators;
}) => {
const referenceId = params.fileReference.split("-")[1].split(".")[0];
const fileData = await fileStore.getFile(
Expand Down
9 changes: 8 additions & 1 deletion app/temporal/src/domain/ColumnValidation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,12 @@ import { ValidatorType } from "./validators";

export interface ColumnValidation {
type: ValidatorType;
regex?: string;
}

export interface RegexColumnValidation extends ColumnValidation {
regex: string;
}

export interface EnumerationColumnValidation extends ColumnValidation {
values: string[];
}
138 changes: 101 additions & 37 deletions app/temporal/src/domain/DataAnalyzer.spec.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
import { ColumnConfig } from "./ColumnConfig";
import { DataAnalyzer, Stats } from "./DataAnalyzer";
import { ValidatorType } from "./validators";
import {
EnumerationColumnValidation,
RegexColumnValidation,
} from "./ColumnValidation";
import {
ColumnValidators,
DataAnalyzer,
SourceFileStatsPerColumn,
} from "./DataAnalyzer";

describe("DataAnalyzer", () => {
const analyzer = new DataAnalyzer();
Expand Down Expand Up @@ -94,7 +101,7 @@ describe("DataAnalyzer", () => {

describe("validation", () => {
it("should validate required columns", () => {
const rowsWithMissingName = [
const rowsWithMissingName: Record<string, string | number | null>[] = [
{
__rowId: 0,
name: "John",
Expand All @@ -103,11 +110,10 @@ describe("DataAnalyzer", () => {
{ __rowId: 2, age: 25 },
{ __rowId: 3, name: "" },
{ __rowId: 4, name: null },
{ __rowId: 5, name: undefined },
];
const validatorColumns = {
required: [{ column: "name" }],
} as Record<ValidatorType, { column: string; regex?: string }[]>;
required: [{ column: "name", config: { type: "required" } }],
} as ColumnValidators;
const stats = {};
const result = analyzer.processDataValidations(
rowsWithMissingName,
Expand Down Expand Up @@ -155,16 +161,6 @@ describe("DataAnalyzer", () => {
},
],
},
{
rowId: 5,
column: "name",
errors: [
{
message: "value is required",
type: "required",
},
],
},
]);
});

Expand All @@ -176,14 +172,16 @@ describe("DataAnalyzer", () => {
},
{ __rowId: 1, name: "John" },
{ __rowId: 2, name: "Egon" },
{ __rowId: 3 },
{ __rowId: 4 },
{ __rowId: 3, name: "" },
{ __rowId: 4, name: "" },
{ __rowId: 5, name: "John" },
];
const validatorColumns = {
unique: [{ column: "name" }],
} as Record<ValidatorType, { column: string; regex?: string }[]>;
const stats: Stats = { name: { nonunique: { John: 3, undefined: 2 } } };
unique: [{ column: "name", config: { type: "unique" } }],
} as ColumnValidators;
const stats: SourceFileStatsPerColumn = {
name: { nonunique: { John: 3, "": 2 } },
};
const result = analyzer.processDataValidations(
rowsWithDuplicateValues,
validatorColumns,
Expand Down Expand Up @@ -251,12 +249,20 @@ describe("DataAnalyzer", () => {
},
{ __rowId: 1, Postleitzahl: "90596" },
{ __rowId: 2, Postleitzahl: "x90596" },
{ __rowId: 3 },
{ __rowId: 3, Postleitzahl: "" },
{ __rowId: 4, Postleitzahl: "123" },
];
const validatorColumns = {
regex: [{ column: "Postleitzahl", regex: "^[0-9]{5}$" }],
} as Record<ValidatorType, { column: string; regex?: string }[]>;
regex: [
{
column: "Postleitzahl",
config: {
type: "regex",
regex: "^[0-9]{5}$",
} as RegexColumnValidation,
},
],
} as ColumnValidators;
const stats = {};
const result = analyzer.processDataValidations(
rowsWithRegexValues,
Expand Down Expand Up @@ -309,12 +315,12 @@ describe("DataAnalyzer", () => {
},
{ __rowId: 2, phone: "+49 151/40604777 " },
{ __rowId: 3, phone: "foo" },
{ __rowId: 4 },
{ __rowId: 4, phone: "" },
];

const validatorColumns = {
phone: [{ column: "phone" }],
} as Record<ValidatorType, { column: string; regex?: string }[]>;
phone: [{ column: "phone", config: { type: "phone" } }],
} as ColumnValidators;
const stats = {};
const result = analyzer.processDataValidations(
rowsWithPhoneValues,
Expand Down Expand Up @@ -352,11 +358,11 @@ describe("DataAnalyzer", () => {
{ __rowId: 2, email: "fiedlefl@gmail" },
{ __rowId: 3, email: "fiedlefl@[email protected]" },
{ __rowId: 4, email: "foo" },
{ __rowId: 5 },
{ __rowId: 5, email: "" },
];
const validatorColumns = {
email: [{ column: "email" }],
} as Record<ValidatorType, { column: string; regex?: string }[]>;
email: [{ column: "email", config: { type: "email" } }],
} as ColumnValidators;
const stats = {};
const result = analyzer.processDataValidations(
rowsWithEmailValues,
Expand Down Expand Up @@ -410,13 +416,23 @@ describe("DataAnalyzer", () => {
it("should validate multiple validations", () => {
const rowsWithDuplicateValues = [{ __rowId: 0 }, { __rowId: 1 }];
const validatorColumns = {
required: [{ column: "name" }],
unique: [{ column: "name" }],
phone: [{ column: "name" }],
email: [{ column: "name" }],
regex: [{ column: "name", regex: "^[0-9]{5}$" }],
} as Record<ValidatorType, { column: string; regex?: string }[]>;
const stats: Stats = { name: { nonunique: { undefined: 2 } } };
required: [{ column: "name", config: { type: "required" } }],
unique: [{ column: "name", config: { type: "unique" } }],
phone: [{ column: "name", config: { type: "phone" } }],
email: [{ column: "name", config: { type: "email" } }],
regex: [
{
column: "name",
config: {
type: "regex",
regex: "^[0-9]{5}$",
} as RegexColumnValidation,
},
],
} as ColumnValidators;
const stats: SourceFileStatsPerColumn = {
name: { nonunique: { undefined: 2 } },
};
const result = analyzer.processDataValidations(
rowsWithDuplicateValues,
validatorColumns,
Expand Down Expand Up @@ -479,6 +495,54 @@ describe("DataAnalyzer", () => {
},
]);
});

it("should validate enum values", () => {
const rowsWithEmailValues = [
{ __rowId: 0, department: "Department 1" },
{ __rowId: 1, department: "Department 2" },
{ __rowId: 2, department: "Department 3" },
{ __rowId: 3, department: "" },
];
const validatorColumns = {
enum: [
{
column: "department",
config: {
type: "enum",
values: ["Department 1", "Department 2"],
} as EnumerationColumnValidation,
},
],
} as ColumnValidators;
const stats = {};
const result = analyzer.processDataValidations(
rowsWithEmailValues,
validatorColumns,
stats
);
expect(result).toEqual([
{
column: "department",
rowId: 2,
errors: [
{
message: "value is not a valid enum",
type: "enum",
},
],
},
{
column: "department",
errors: [
{
message: "value is not a valid enum",
type: "enum",
},
],
rowId: 3,
},
]);
});
});

it("should return stats", () => {
Expand Down
22 changes: 15 additions & 7 deletions app/temporal/src/domain/DataAnalyzer.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import Fuse from "fuse.js";
import type { ValidatorColumns } from "../activities";
import { ColumnConfig } from "./ColumnConfig";
import { ColumnValidation } from "./ColumnValidation";
import { ValidationError } from "./ValidationError";
import { ValidatorType, validators } from "./validators";

Expand All @@ -10,7 +10,15 @@ export interface DataMappingRecommendation {
confidence: number;
}

export type Stats = Record<string, { nonunique: Record<string, number> }>;
export type SourceFileStatsPerColumn = Record<
string,
{ nonunique: Record<string, number> }
>;

export type ColumnValidators = Record<
ValidatorType,
{ column: string; config: ColumnValidation }[]
>;

export class DataAnalyzer {
constructor() {}
Expand Down Expand Up @@ -58,9 +66,9 @@ export class DataAnalyzer {
}

public processDataValidations(
data: Record<string, unknown>[],
validatorColumns: ValidatorColumns,
stats: Stats
data: Record<string, string | number | null>[],
validatorColumns: ColumnValidators,
stats: SourceFileStatsPerColumn
): { rowId: number; column: string; errors: ValidationError[] }[] {
const chunkErrors: {
rowId: number;
Expand Down Expand Up @@ -101,9 +109,9 @@ export class DataAnalyzer {
public getStats(
data: Record<string, unknown>[],
columnsToVerify: string[]
): Stats {
): SourceFileStatsPerColumn {
// nonunique
const stats = {} as Stats;
const stats = {} as SourceFileStatsPerColumn;
for (const column of columnsToVerify) {
const duplicates = new Map();
data.forEach((row) => {
Expand Down
6 changes: 4 additions & 2 deletions app/temporal/src/domain/validators/EmailValidator.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import { Validator } from ".";
import { ColumnValidation } from "../ColumnValidation";
import { ValidationError } from "../ValidationError";

const EMAIL_REGEX =
/^[-!#$%&'*+\/0-9=?A-Z^_a-z`{|}~](\.?[-!#$%&'*+\/0-9=?A-Z^_a-z`{|}~])*@[a-zA-Z0-9](-*\.?[a-zA-Z0-9])*\.[a-zA-Z](-?[a-zA-Z0-9])+$/;

export class EmailValidator {
export class EmailValidator implements Validator {
validate(
row: Record<string, unknown>,
columnConfig: { column: string; regex?: string }[]
columnConfig: { column: string; config: ColumnValidation }[]
): Record<string, ValidationError> {
const errors: Record<string, ValidationError> = {};
const columnsToValidate = columnConfig.map((item) => item.column);
Expand Down
27 changes: 27 additions & 0 deletions app/temporal/src/domain/validators/EnumValidator.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import { Validator } from ".";
import { EnumerationColumnValidation } from "../ColumnValidation";
import { ValidationError } from "../ValidationError";

export class EnumValidator implements Validator {
public validate(
row: Record<string, string | number | null>,
columnConfigs: { column: string; config: EnumerationColumnValidation }[]
): Record<string, ValidationError> {
const errors: Record<string, ValidationError> = {};
for (const { column, config } of columnConfigs) {
let dataToValidate = row[column];
if (!dataToValidate || typeof dataToValidate !== "string") {
errors[column] = {
type: "enum",
message: "value is not a valid enum",
};
} else if (config.values.includes(dataToValidate) === false) {
errors[column] = {
type: "enum",
message: "value is not a valid enum",
};
}
}
return errors;
}
}
Loading

0 comments on commit c58b047

Please sign in to comment.