Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make Fuzz.string generate unicode strings. #92

Merged
merged 6 commits into from
May 10, 2020
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 50 additions & 25 deletions src/Fuzz.elm
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ import Fuzz.Internal as Internal
, Valid
, ValidFuzzer
, combineValid
, frequencyList
, invalidReason
)
import Lazy
Expand Down Expand Up @@ -280,41 +281,65 @@ asciiCharGenerator =
Random.map Char.fromCode (Random.int 32 126)


whitespaceCharGenerator : Generator Char
whitespaceCharGenerator =
Random.sample [ ' ', '\t', '\n' ] |> Random.map (Maybe.withDefault ' ')
unicodeCharGeneratorFrequencies : ( ( Float, Generator Char ), List ( Float, Generator Char ) )
unicodeCharGeneratorFrequencies =
let
ascii =
asciiCharGenerator

whitespace =
Random.sample [ ' ', '\t', '\n' ] |> Random.map (Maybe.withDefault ' ')

tilde =
'̃'

circumflex =
'̂'

diaeresis =
'̈'

combiningDiacriticalMarks =
Random.sample [ circumflex, tilde, diaeresis ] |> Random.map (Maybe.withDefault circumflex)

emoji =
Random.sample [ '🌈', '❤', '🔥' ] |> Random.map (Maybe.withDefault '❤')
in
( ( 4, ascii )
, [ ( 1, whitespace )
, ( 1, combiningDiacriticalMarks )
, ( 1, emoji )
]
)


{-| Generates random printable ASCII strings of up to 1000 characters.
{-| Generates random printable unicode strings of up to 1000 characters.

Shorter strings are more common, especially the empty string.

-}
string : Fuzzer String
string =
let
asciiGenerator : Generator String
asciiGenerator =
Random.frequency
( 3, Random.int 1 10 )
[ ( 0.2, Random.constant 0 )
, ( 1, Random.int 11 50 )
, ( 1, Random.int 50 1000 )
]
|> Random.andThen (Random.lengthString asciiCharGenerator)

whitespaceGenerator : Generator String
whitespaceGenerator =
Random.int 1 10
|> Random.andThen (Random.lengthString whitespaceCharGenerator)
unicodeGenerator : Generator String
unicodeGenerator =
let
( freq, rest ) =
unicodeCharGeneratorFrequencies
in
frequencyList
(Random.frequency
( 3, Random.int 1 10 )
[ ( 0.2, Random.constant 0 )
, ( 1, Random.int 11 50 )
, ( 1, Random.int 50 1000 )
]
)
Copy link
Collaborator

@harrysarson harrysarson Aug 20, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I found this function slightly hard to grok and, whilst having a play with it to get my head round what the code does, rewrote it slightly. I believe the function might be slightly easier to read is this form:

string : Fuzzer String
string =
    let
        ( firstFreq, restFreqs ) =
            unicodeCharGeneratorFrequencies

        lengthGenerator =
            (Random.frequency
                ( 3, Random.int 1 10 )
                [ ( 0.2, Random.constant 0 )
                , ( 1, Random.int 11 50 )
                , ( 1, Random.int 50 1000 )
                ]
            )

        unicodeGenerator =
            frequencyList lengthGenerator firstFreq restFreqs
                |> Random.map String.fromList
    in
    custom unicodeGenerator Simplify.string 

Otherwise this patch looks good to me :)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks good to me :) Took it as you wrote it and ran it though elm-format.

freq
rest
|> Random.map String.fromList
in
custom
(Random.frequency
( 9, asciiGenerator )
[ ( 1, whitespaceGenerator )
]
)
Simplify.string
custom unicodeGenerator Simplify.string


{-| Given a fuzzer of a type, create a fuzzer of a maybe for that type.
Expand Down
53 changes: 52 additions & 1 deletion src/Fuzz/Internal.elm
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
module Fuzz.Internal exposing (Fuzzer, Valid, ValidFuzzer, combineValid, invalidReason, map)
module Fuzz.Internal exposing (Fuzzer, Valid, ValidFuzzer, combineValid, frequencyList, invalidReason, map)

import Lazy
import Lazy.List exposing (LazyList)
import MicroRandomExtra
import Random exposing (Generator)
import RoseTree exposing (RoseTree(..))

Expand Down Expand Up @@ -84,3 +85,53 @@ invalidReason valid =

Err reason ->
Just reason


{-| Creates a single generator from a list of generators by, once at the start, randomly choosing between:

- a single element from a single generator,
- a single one of the generators
- a pair of the generators, or
- all of the generators.

It then runs Fuzz.frequency on that subset until we have the desired length list.

-}
frequencyList : Generator Int -> ( Float, Generator a ) -> List ( Float, Generator a ) -> Generator (List a)
frequencyList lengthGenerator pair pairs =
let
rConst ( a, b ) =
( a, Random.constant ( a, b ) )

randomGenerator : Generator ( Float, Generator a )
randomGenerator =
MicroRandomExtra.frequency (rConst pair) (List.map rConst pairs)

nonEmptySample a rest =
MicroRandomExtra.sample (a :: rest) |> Random.map (Maybe.withDefault a)

generator : Generator (Generator a)
generator =
nonEmptySample
-- single repeated element for a single generator
(MicroRandomExtra.frequency pair pairs
|> Random.map Random.constant
)
[ -- single generator
randomGenerator
|> Random.map Tuple.second

-- pair of generators
, Random.map2
(\firstGenerator secondGenerator -> MicroRandomExtra.frequency firstGenerator [ secondGenerator ])
randomGenerator
randomGenerator

-- all generators
, MicroRandomExtra.frequency pair pairs
|> Random.constant
]
|> Random.andThen identity
in
Random.map2 Tuple.pair lengthGenerator generator
|> Random.andThen (\( len, gen ) -> Random.list len gen)
74 changes: 74 additions & 0 deletions tests/src/FuzzerTests.elm
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ fuzzerTests =
Expect.equal valNoSimplify valWithSimplify
, simplifyingTests
, manualFuzzerTests
, unicodeStringFuzzerTests
]
]

Expand Down Expand Up @@ -287,3 +288,76 @@ whitespace =
|> Fuzz.oneOf
|> Fuzz.list
|> Fuzz.map String.fromList


unicodeStringFuzzerTests : Test
unicodeStringFuzzerTests =
describe "unicode string fuzzer" <|
-- These tests are a bit hard to read. Sorry about that.
--
-- The tools we have at our disposal are:
-- - Forall (∀) in the form of normal fuzz tests
-- - Exists not (∃𝑥¬) in the form of expectTestToFail
--
-- so with these tools we made these statistical tests:
--
-- `exists (fuzzed string) such that ((fuzzed string) contains (specific string))` -- what we want to test
-- <=> (¬¬𝑥 <=> 𝑥) (since the only tool for Exists we have is Exists not, we negate the body to counter that negation)
-- `exists (fuzzed string) such that (not (not ((fuzzed string) contains (specific string))))` -- what we actually test here
-- where
-- `expectTestsToFail x` <=> `exists (fuzzed string) such that (not x)`
-- so what our fuzz tests should looks like is
-- `(not ((fuzzed string) contains (specific string)))`
[ test "generates ascii" <|
\() ->
expectTestToFail <|
fuzz string "generates ascii" <|
\str -> str |> String.contains "E" |> Expect.equal False
, test "generates whitespace" <|
\() ->
expectTestToFail <|
fuzz string "generates whitespace" <|
\str -> str |> String.contains "\t" |> Expect.equal False
, test "generates combining diacritical marks" <|
\() ->
expectTestToFail <|
fuzz string "generates combining diacritical marks" <|
\str -> str |> String.contains "̃" |> Expect.equal False
, test "generates emoji" <|
\() ->
expectTestToFail <|
fuzz string "generates emoji" <|
\str -> str |> String.contains "🔥" |> Expect.equal False
, test "generates long strings with a single character" <|
\() ->
expectTestToFail <|
fuzz string "generates long strings with a single character" <|
\str ->
let
countSequentialEqualCharsAtStartOfString s =
case s of
a :: b :: cs ->
if a == b then
1 + countSequentialEqualCharsAtStartOfString (b :: cs)

else
0

_ ->
0
in
str
|> String.toList
|> countSequentialEqualCharsAtStartOfString
|> (\x -> x > 10)
|> -- expecting this test to pass at least once, but we don't have an expectToPassAtLeastOnce function, so instead this inner expectation is reversed and we use expectToFail
Expect.equal False
, test "the String.reverse bug that prevented us from releasing unicode string fuzzers in August 2017 is now fixed" <|
-- if characters that span more than one utf-16 character work, this version of the unicode string fuzzer is good to go
\() -> "🔥" |> String.reverse |> Expect.equal "🔥"

--, test "String.reverse implements unicode string reversing correctly" <|
-- -- String.reverse still doesn't properly implement unicode string reversing, so combining emojis like skin tones or families break
-- -- Here's a test that should pass, since these emoji families are supposed to be counted as single elements when reversing the string. When I'm writing this, I instead get a per-character string reversal, which renders as four emojis after each other "👦👦👩👩" (plus a bunch of non-printable characters in-between).
-- \() -> "👩‍👩‍👦‍👦" |> String.reverse |> Expect.equal "👩‍👩‍👦‍👦"
]