Skip to content

Commit

Permalink
Add unittests for str_to_date, fix #3556, #3557 (#3581) (#3936)
Browse files Browse the repository at this point in the history
close #3557
  • Loading branch information
ti-chi-bot committed Jun 17, 2022
1 parent c1a9569 commit 96b6e26
Show file tree
Hide file tree
Showing 4 changed files with 231 additions and 59 deletions.
87 changes: 37 additions & 50 deletions dbms/src/Common/MyTime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ std::vector<String> parseDateFormat(String format)
{
format = Poco::trimInPlace(format);

if (format.size() == 0)
if (format.empty())
return {};

if (!std::isdigit(format[0]) || !std::isdigit(format[format.size() - 1]))
Expand Down Expand Up @@ -538,7 +538,7 @@ Field parseMyDateTime(const String & str, int8_t fsp)
{
// if tz_sign is empty, it's sure that the string literal contains timezone (e.g., 2010-10-10T10:10:10Z),
// therefore we could safely skip this branch.
if (!no_absorb(seps) && !(tz_minute != "" && tz_sep == ""))
if (!no_absorb(seps) && !(!tz_minute.empty() && tz_sep.empty()))
{
// we can't absorb timezone if there is no separate between tz_hour and tz_minute
if (!tz_hour.empty())
Expand Down Expand Up @@ -910,9 +910,8 @@ size_t maxFormattedDateTimeStringLength(const String & format)
{
size_t result = 0;
bool in_pattern_match = false;
for (size_t i = 0; i < format.size(); i++)
for (char x : format)
{
char x = format[i];
if (in_pattern_match)
{
switch (x)
Expand Down Expand Up @@ -1029,7 +1028,6 @@ void MyTimeBase::check(bool allow_zero_in_date, bool allow_invalid_date) const
{
throw TiFlashException("Incorrect datetime value", Errors::Types::WrongValue);
}
return;
}

bool toCoreTimeChecked(const UInt64 & year, const UInt64 & month, const UInt64 & day, const UInt64 & hour, const UInt64 & minute, const UInt64 & second, const UInt64 & microsecond, MyDateTime & result)
Expand All @@ -1049,9 +1047,8 @@ bool toCoreTimeChecked(const UInt64 & year, const UInt64 & month, const UInt64 &
MyDateTimeFormatter::MyDateTimeFormatter(const String & layout)
{
bool in_pattern_match = false;
for (size_t i = 0; i < layout.size(); i++)
for (char x : layout)
{
char x = layout[i];
if (in_pattern_match)
{
switch (x)
Expand Down Expand Up @@ -1286,7 +1283,7 @@ struct MyDateTimeParser::Context
// The pos we are parsing from
size_t pos = 0;

Context(StringRef view_)
explicit Context(StringRef view_)
: view(std::move(view_))
{}
};
Expand Down Expand Up @@ -1331,27 +1328,27 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
{
// Use temp_pos instead of changing `ctx.pos` directly in case of parsing failure
size_t temp_pos = ctx.pos;
auto checkIfEnd = [&temp_pos, &ctx]() -> ParseState {
auto check_if_end = [&temp_pos, &ctx]() -> ParseState {
// To the end
if (temp_pos == ctx.view.size)
return ParseState::END_OF_FILE;
return ParseState::NORMAL;
};
auto skip_whitespaces = [&temp_pos, &ctx, &checkIfEnd]() -> ParseState {
auto skip_white_spaces = [&temp_pos, &ctx, &check_if_end]() -> ParseState {
while (temp_pos < ctx.view.size && isWhitespaceASCII(ctx.view.data[temp_pos]))
++temp_pos;
return checkIfEnd();
return check_if_end();
};
auto parseSep = [&temp_pos, &ctx, &skip_whitespaces]() -> ParseState {
if (skip_whitespaces() == ParseState::END_OF_FILE)
auto parse_sep = [&temp_pos, &ctx, &skip_white_spaces]() -> ParseState {
if (skip_white_spaces() == ParseState::END_OF_FILE)
return ParseState::END_OF_FILE;
// parse ":"
if (ctx.view.data[temp_pos] != ':')
return ParseState::FAIL;
temp_pos += 1; // move forward
return ParseState::NORMAL;
};
auto tryParse = [&]() -> ParseState {
auto try_parse = [&]() -> ParseState {
ParseState state = ParseState::NORMAL;
/// Note that we should update `time` as soon as possible, or we
/// can not get correct result for incomplete input like "12:13"
Expand All @@ -1360,7 +1357,7 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
// hh
size_t step = 0;
int32_t hour = 0;
if (state = skip_whitespaces(); state != ParseState::NORMAL)
if (state = skip_white_spaces(); state != ParseState::NORMAL)
return state;
std::tie(step, hour) = parseNDigits(ctx.view, temp_pos, 2);
if (step == 0 || hour > 12 || hour == 0)
Expand All @@ -1372,23 +1369,23 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
time.hour = hour;
temp_pos += step; // move forward

if (state = parseSep(); state != ParseState::NORMAL)
if (state = parse_sep(); state != ParseState::NORMAL)
return state;

int32_t minute = 0;
if (state = skip_whitespaces(); state != ParseState::NORMAL)
if (state = skip_white_spaces(); state != ParseState::NORMAL)
return state;
std::tie(step, minute) = parseNDigits(ctx.view, temp_pos, 2);
if (step == 0 || minute > 59)
return ParseState::FAIL;
time.minute = minute;
temp_pos += step; // move forward

if (state = parseSep(); state != ParseState::NORMAL)
if (state = parse_sep(); state != ParseState::NORMAL)
return state;

int32_t second = 0;
if (state = skip_whitespaces(); state != ParseState::NORMAL)
if (state = skip_white_spaces(); state != ParseState::NORMAL)
return state;
std::tie(step, second) = parseNDigits(ctx.view, temp_pos, 2);
if (step == 0 || second > 59)
Expand All @@ -1397,7 +1394,7 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
temp_pos += step; // move forward

int meridiem = 0; // 0 - invalid, 1 - am, 2 - pm
if (state = skip_whitespaces(); state != ParseState::NORMAL)
if (state = skip_white_spaces(); state != ParseState::NORMAL)
return state;
// "AM"/"PM" must be parsed as a single element
// "11:13:56a" is an invalid input for "%r".
Expand All @@ -1423,7 +1420,7 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
temp_pos += 2; // move forward
return ParseState::NORMAL;
};
if (auto state = tryParse(); state == ParseState::FAIL)
if (auto state = try_parse(); state == ParseState::FAIL)
return false;
// Other state, forward the `ctx.pos` and return true
ctx.pos = temp_pos;
Expand All @@ -1435,27 +1432,27 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
{
// Use temp_pos instead of changing `ctx.pos` directly in case of parsing failure
size_t temp_pos = ctx.pos;
auto checkIfEnd = [&temp_pos, &ctx]() -> ParseState {
auto check_if_end = [&temp_pos, &ctx]() -> ParseState {
// To the end
if (temp_pos == ctx.view.size)
return ParseState::END_OF_FILE;
return ParseState::NORMAL;
};
auto skip_whitespaces = [&temp_pos, &ctx, &checkIfEnd]() -> ParseState {
auto skip_white_spaces = [&temp_pos, &ctx, &check_if_end]() -> ParseState {
while (temp_pos < ctx.view.size && isWhitespaceASCII(ctx.view.data[temp_pos]))
++temp_pos;
return checkIfEnd();
return check_if_end();
};
auto parseSep = [&temp_pos, &ctx, &skip_whitespaces]() -> ParseState {
if (skip_whitespaces() == ParseState::END_OF_FILE)
auto parse_sep = [&temp_pos, &ctx, &skip_white_spaces]() -> ParseState {
if (skip_white_spaces() == ParseState::END_OF_FILE)
return ParseState::END_OF_FILE;
// parse ":"
if (ctx.view.data[temp_pos] != ':')
return ParseState::FAIL;
temp_pos += 1; // move forward
return ParseState::NORMAL;
};
auto tryParse = [&]() -> ParseState {
auto try_parse = [&]() -> ParseState {
ParseState state = ParseState::NORMAL;
/// Note that we should update `time` as soon as possible, or we
/// can not get correct result for incomplete input like "12:13"
Expand All @@ -1464,31 +1461,31 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
// hh
size_t step = 0;
int32_t hour = 0;
if (state = skip_whitespaces(); state != ParseState::NORMAL)
if (state = skip_white_spaces(); state != ParseState::NORMAL)
return state;
std::tie(step, hour) = parseNDigits(ctx.view, temp_pos, 2);
if (step == 0 || hour > 23)
return ParseState::FAIL;
time.hour = hour;
temp_pos += step; // move forward

if (state = parseSep(); state != ParseState::NORMAL)
if (state = parse_sep(); state != ParseState::NORMAL)
return state;

int32_t minute = 0;
if (state = skip_whitespaces(); state != ParseState::NORMAL)
if (state = skip_white_spaces(); state != ParseState::NORMAL)
return state;
std::tie(step, minute) = parseNDigits(ctx.view, temp_pos, 2);
if (step == 0 || minute > 59)
return ParseState::FAIL;
time.minute = minute;
temp_pos += step; // move forward

if (state = parseSep(); state != ParseState::NORMAL)
if (state = parse_sep(); state != ParseState::NORMAL)
return state;

int32_t second = 0;
if (state = skip_whitespaces(); state != ParseState::NORMAL)
if (state = skip_white_spaces(); state != ParseState::NORMAL)
return state;
std::tie(step, second) = parseNDigits(ctx.view, temp_pos, 2);
if (step == 0 || second > 59)
Expand All @@ -1498,7 +1495,7 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)

return ParseState::NORMAL;
};
if (auto state = tryParse(); state == ParseState::FAIL)
if (auto state = try_parse(); state == ParseState::FAIL)
return false;
// Other state, forward the `ctx.pos` and return true
ctx.pos = temp_pos;
Expand Down Expand Up @@ -1544,6 +1541,9 @@ MyDateTimeParser::MyDateTimeParser(String format_)
});
break;
}
case 'm':
//"%m": Month, numeric (00..12)
[[fallthrough]];
case 'c':
{
//"%c": Month, numeric (0..12)
Expand Down Expand Up @@ -1585,9 +1585,9 @@ MyDateTimeParser::MyDateTimeParser(String format_)
time.micro_second = 0;
return true;
}
// The siffix '0' can be ignored.
// The suffix '0' can be ignored.
// "9" means 900000
while (ms > 0 && ms * 10 < 1000000)
for (size_t i = step; i < 6; i++)
{
ms *= 10;
}
Expand Down Expand Up @@ -1683,19 +1683,6 @@ MyDateTimeParser::MyDateTimeParser(String format_)
});
break;
}
case 'm':
{
//"%m": Month, numeric (00..12)
parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase & time) -> bool {
auto [step, month] = parseNDigits(ctx.view, ctx.pos, 2);
if (step == 0 || month > 12)
return false;
time.month = month;
ctx.pos += step;
return true;
});
break;
}
case 'S':
//"%S": Seconds (00..59)
[[fallthrough]];
Expand Down Expand Up @@ -1942,7 +1929,7 @@ std::optional<UInt64> MyDateTimeParser::parseAsPackedUInt(const StringRef & str_
MyDateTimeParser::Context ctx(str_view);

// TODO: can we return warnings to TiDB?
for (auto & f : parsers)
for (const auto & f : parsers)
{
// Ignore all prefix white spaces before each pattern match (TODO: handle unicode space?)
while (ctx.pos < str_view.size && isWhitespaceASCII(str_view.data[ctx.pos]))
Expand All @@ -1951,7 +1938,7 @@ std::optional<UInt64> MyDateTimeParser::parseAsPackedUInt(const StringRef & str_
if (ctx.pos == ctx.view.size)
break;

if (f(ctx, my_time) != true)
if (!f(ctx, my_time))
{
#ifndef NDEBUG
LOG_TRACE(&Poco::Logger::get("MyDateTimeParser"),
Expand Down
89 changes: 89 additions & 0 deletions dbms/src/Common/tests/gtest_mytime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,96 @@ try
// {"Tuesday 52 2001", "%W %V %Y", std::nullopt}, //
// {"Tuesday 52 2001", "%W %V %x", std::nullopt}, //
// {"Tuesday 52 2001", "%W %u %x", std::nullopt}, //

// Test cases for %b
{"10/JAN/2010", "%d/%b/%Y", MyDateTime{2010, 1, 10, 0, 0, 0, 0}}, // Right spill, case-insensitive
{"10/FeB/2010", "%d/%b/%Y", MyDateTime{2010, 2, 10, 0, 0, 0, 0}},
{"10/MAr/2010", "%d/%b/%Y", MyDateTime{2010, 3, 10, 0, 0, 0, 0}},
{"10/ApR/2010", "%d/%b/%Y", MyDateTime{2010, 4, 10, 0, 0, 0, 0}},
{"10/mAY/2010", "%d/%b/%Y", MyDateTime{2010, 5, 10, 0, 0, 0, 0}},
{"10/JuN/2010", "%d/%b/%Y", MyDateTime{2010, 6, 10, 0, 0, 0, 0}},
{"10/JUL/2010", "%d/%b/%Y", MyDateTime{2010, 7, 10, 0, 0, 0, 0}},
{"10/Aug/2010", "%d/%b/%Y", MyDateTime{2010, 8, 10, 0, 0, 0, 0}},
{"10/seP/2010", "%d/%b/%Y", MyDateTime{2010, 9, 10, 0, 0, 0, 0}},
{"10/Oct/2010", "%d/%b/%Y", MyDateTime{2010, 10, 10, 0, 0, 0, 0}},
{"10/NOV/2010", "%d/%b/%Y", MyDateTime{2010, 11, 10, 0, 0, 0, 0}},
{"10/DEC/2010", "%d/%b/%Y", MyDateTime{2010, 12, 10, 0, 0, 0, 0}},
{"10/January/2010", "%d/%b/%Y", std::nullopt}, // Test full spilling

// Test cases for %M
{"10/January/2010", "%d/%M/%Y", MyDateTime{2010, 1, 10, 0, 0, 0, 0}}, // Test full spilling
{"10/February/2010", "%d/%M/%Y", MyDateTime{2010, 2, 10, 0, 0, 0, 0}},
{"10/March/2010", "%d/%M/%Y", MyDateTime{2010, 3, 10, 0, 0, 0, 0}},
{"10/April/2010", "%d/%M/%Y", MyDateTime{2010, 4, 10, 0, 0, 0, 0}},
{"10/May/2010", "%d/%M/%Y", MyDateTime{2010, 5, 10, 0, 0, 0, 0}},
{"10/June/2010", "%d/%M/%Y", MyDateTime{2010, 6, 10, 0, 0, 0, 0}},
{"10/July/2010", "%d/%M/%Y", MyDateTime{2010, 7, 10, 0, 0, 0, 0}},
{"10/August/2010", "%d/%M/%Y", MyDateTime{2010, 8, 10, 0, 0, 0, 0}},
{"10/September/2010", "%d/%M/%Y", MyDateTime{2010, 9, 10, 0, 0, 0, 0}},
{"10/October/2010", "%d/%M/%Y", MyDateTime{2010, 10, 10, 0, 0, 0, 0}},
{"10/November/2010", "%d/%M/%Y", MyDateTime{2010, 11, 10, 0, 0, 0, 0}},
{"10/December/2010", "%d/%M/%Y", MyDateTime{2010, 12, 10, 0, 0, 0, 0}},

// Test cases for %c
// {"10/0/2010", "%d/%c/%Y", MyDateTime{2010, 0, 10, 0, 0, 0, 0}}, // TODO: Need Check NO_ZERO_DATE
{"10/1/2010", "%d/%c/%Y", MyDateTime{2010, 1, 10, 0, 0, 0, 0}},
{"10/01/2010", "%d/%c/%Y", MyDateTime{2010, 1, 10, 0, 0, 0, 0}},
{"10/001/2010", "%d/%c/%Y", std::nullopt},
{"10/13/2010", "%d/%c/%Y", std::nullopt},
{"10/12/2010", "%d/%c/%Y", MyDateTime{2010, 12, 10, 0, 0, 0, 0}},

// Test cases for %d, %e
// {"0/12/2010", "%d/%c/%Y", MyDateTime{2010, 12, 0, 0, 0, 0, 0}}, // TODO: Need Check NO_ZERO_DATE
{"1/12/2010", "%d/%c/%Y", MyDateTime{2010, 12, 1, 0, 0, 0, 0}},
{"05/12/2010", "%d/%c/%Y", MyDateTime{2010, 12, 5, 0, 0, 0, 0}},
{"05/12/2010", "%e/%c/%Y", MyDateTime{2010, 12, 5, 0, 0, 0, 0}},
{"31/12/2010", "%d/%c/%Y", MyDateTime{2010, 12, 31, 0, 0, 0, 0}},
{"32/12/2010", "%d/%c/%Y", std::nullopt},
{"30/11/2010", "%d/%c/%Y", MyDateTime{2010, 11, 30, 0, 0, 0, 0}},
{"31/11/2010", "%e/%c/%Y", MyDateTime{2010, 11, 31, 0, 0, 0, 0}},
{"28/2/2010", "%e/%c/%Y", MyDateTime{2010, 2, 28, 0, 0, 0, 0}},
{"29/2/2010", "%e/%c/%Y", MyDateTime{2010, 2, 29, 0, 0, 0, 0}},
{"29/2/2020", "%e/%c/%Y", MyDateTime{2020, 2, 29, 0, 0, 0, 0}},

// Test cases for %Y
// {"1/12/0000", "%d/%c/%Y", MyDateTime{0000, 12, 1, 0, 0, 0, 0}}, // TODO: Need Check NO_ZERO_DATE
{"1/12/01", "%d/%c/%Y", MyDateTime{2001, 12, 1, 0, 0, 0, 0}},
{"1/12/0001", "%d/%c/%Y", MyDateTime{0001, 12, 1, 0, 0, 0, 0}},
{"1/12/2020", "%d/%c/%Y", MyDateTime{2020, 12, 1, 0, 0, 0, 0}},
{"1/12/9999", "%d/%c/%Y", MyDateTime{9999, 12, 1, 0, 0, 0, 0}},

// Test cases for %f
{"01,5,2013 999999", "%d,%c,%Y %f", MyDateTime{2013, 5, 1, 0, 0, 0, 999999}},
{"01,5,2013 0", "%d,%c,%Y %f", MyDateTime{2013, 5, 1, 0, 0, 0, 0}},
{"01,5,2013 9999990000000", "%d,%c,%Y %f", MyDateTime{2013, 5, 1, 0, 0, 0, 999999}},
{"01,5,2013 1", "%d,%c,%Y %f", MyDateTime{2013, 5, 1, 0, 0, 0, 100000}},
{"01,5,2013 1230", "%d,%c,%Y %f", MyDateTime{2013, 5, 1, 0, 0, 0, 123000}},
{"01,5,2013 01", "%d,%c,%Y %f", MyDateTime{2013, 5, 1, 0, 0, 0, 10000}}, // issue 3556

// Test cases for %h, %I, %l
{"00:11:12 ", "%h:%i:%S ", std::nullopt}, // 0 is not a valid number of %h
{"01:11:12 ", "%I:%i:%S ", MyDateTime{0, 0, 0, 01, 11, 12, 0}},
{"12:11:12 ", "%l:%i:%S ", MyDateTime{0, 0, 0, 0, 11, 12, 0}},

// Test cases for %k, %H
{"00:11:12 ", "%H:%i:%S ", MyDateTime{0, 0, 0, 00, 11, 12, 0}},
{"01:11:12 ", "%k:%i:%S ", MyDateTime{0, 0, 0, 01, 11, 12, 0}},
{"12:11:12 ", "%H:%i:%S ", MyDateTime{0, 0, 0, 12, 11, 12, 0}},
{"24:11:12 ", "%k:%i:%S ", std::nullopt},

// Test cases for %i
{"00:00:12 ", "%H:%i:%S ", MyDateTime{0, 0, 0, 00, 00, 12, 0}},
{"00:01:12 ", "%H:%i:%S ", MyDateTime{0, 0, 0, 00, 01, 12, 0}},
{"00:59:12 ", "%H:%i:%S ", MyDateTime{0, 0, 0, 00, 59, 12, 0}},
{"00:60:12 ", "%H:%i:%S ", std::nullopt},

// Test cases for %s, %S
{"00:00:00 ", "%H:%i:%s ", MyDateTime{0, 0, 0, 00, 00, 00, 0}},
{"00:01:01 ", "%H:%i:%s ", MyDateTime{0, 0, 0, 00, 01, 01, 0}},
{"00:59:59 ", "%H:%i:%S ", MyDateTime{0, 0, 0, 00, 59, 59, 0}},
{"00:59:60 ", "%H:%i:%S ", std::nullopt},
};

auto result_formatter = MyDateTimeFormatter("%Y/%m/%d %T.%f");
size_t idx = 0;
for (const auto & [input, fmt, expected] : cases)
Expand Down
Loading

0 comments on commit 96b6e26

Please sign in to comment.