From 1163e9a761c55e6b06f3bcaa31c61e5150431355 Mon Sep 17 00:00:00 2001 From: David Korczynski Date: Wed, 15 Mar 2023 04:55:42 -0700 Subject: [PATCH 1/6] Add OSS-Fuzz set up Signed-off-by: David Korczynski --- tests/fuzz/README.md | 39 +++++++++++++++++++++++ tests/fuzz/fuzz_markdown.py | 21 +++++++++++++ tests/fuzz/fuzz_markdown_extended.py | 47 ++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+) create mode 100644 tests/fuzz/README.md create mode 100644 tests/fuzz/fuzz_markdown.py create mode 100644 tests/fuzz/fuzz_markdown_extended.py diff --git a/tests/fuzz/README.md b/tests/fuzz/README.md new file mode 100644 index 00000000..fa0438ef --- /dev/null +++ b/tests/fuzz/README.md @@ -0,0 +1,39 @@ +# Fuzzing by way of OSS-Fuzz + +Fuzzing set up that is run by OSS-Fuzz. The relevant files in the OSS-Fuzz +repository is [here](https://github.com/google/oss-fuzz/tree/master/projects/markdown-it-py). + +The fuzzers require the [Atheris](https://pypi.org/project/atheris/) package. +You need this package to run the fuzzers locally (i.e. non oss-fuzz). + +## Building by way of OSS-Fuzz +The following steps will build the fuzzers using the OSS-Fuzz infrastructure: +``` +git clone https://github.com/google/oss-fuzz +cd oss-fuzz +python3 infra/helper.py build_fuzzers markdown-it-py + +# The fuzzers are now placed in build/out/markdown-it-py +# To run the fuzz_markdown fuzzer: +python3 infra/helper.py run_fuzzer markdown-it-py fuzz_markdown +``` + +## Extending so fuzzers run on OSS-Fuzz +The build script on the OSS-Fuzz repository for markdown-it-py fuzzers is +here: https://github.com/google/oss-fuzz/blob/master/projects/markdown-it-py/build.sh + +Any file that matches `fuzz_*.py` in this repository will be build and run on +OSS-Fuzz. Thus, to extend with a new fuzzer simply name it accordingly. + +## Reproducing issues +In order to reproduce an issue reported by OSS-Fuzz, you need to: +1) Download the `Minimized Testcase` (which is a file or raw bytes) from the +detailed OSS-Fuzz reports. Example link: https://oss-fuzz.com/testcase-detail/5424112454729728 +2) Build the fuzzers as shown above +3) Use the command: + +``` +python3 infra/helper.py reproduce markdown-it-py {FUZZER_NAME} {PATH_TO_MINIMIZED_TESTCASE} +``` + +For a more thorough guide on reproducing, see: https://google.github.io/oss-fuzz/advanced-topics/reproducing/ diff --git a/tests/fuzz/fuzz_markdown.py b/tests/fuzz/fuzz_markdown.py new file mode 100644 index 00000000..96e26f03 --- /dev/null +++ b/tests/fuzz/fuzz_markdown.py @@ -0,0 +1,21 @@ +import sys +import atheris +from markdown_it import MarkdownIt + + +def TestOneInput(data): + fdp = atheris.FuzzedDataProvider(data) + md = MarkdownIt() + raw_markdown = fdp.ConsumeUnicodeNoSurrogates(sys.maxsize) + md.parse(raw_markdown) + md.render(raw_markdown) + + +def main(): + atheris.instrument_all() + atheris.Setup(sys.argv, TestOneInput) + atheris.Fuzz() + + +if __name__ == "__main__": + main() diff --git a/tests/fuzz/fuzz_markdown_extended.py b/tests/fuzz/fuzz_markdown_extended.py new file mode 100644 index 00000000..93447de0 --- /dev/null +++ b/tests/fuzz/fuzz_markdown_extended.py @@ -0,0 +1,47 @@ +import sys +import atheris + +# Beautified from auto-generated fuzzer at: +# https://github.com/ossf/fuzz-introspector/pull/872#issuecomment-1450847118 +# Auto-fuzz heuristics used: py-autofuzz-heuristics-4.1 +# Imports by the generated code +import markdown_it + +def TestOneInput(data): + fdp = atheris.FuzzedDataProvider(data) + val_1 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 1024)) + val_2 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 1024)) + val_3 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + val_4 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + val_5 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + val_6 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + val_7 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + val_8 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + val_9 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + val_10 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + + try: + c1 = markdown_it.main.MarkdownIt() + c1.render(val_1) + c1.parse(val_2) + c1.renderInline(val_3) + c1.parseInline(val_4) + c1.normalizeLink(val_5) + c1.normalizeLinkText(val_6) + c1.disable(val_7) + c1.enable(val_8) + c1.validateLink(val_9) + c1.configure(val_10) + except(ValueError,KeyError,TypeError,): + # Exceptions thrown by the hit code. + pass + + +def main(): + atheris.instrument_all() + atheris.Setup(sys.argv, TestOneInput) + atheris.Fuzz() + + +if __name__ == "__main__": + main() From 0da43091c3ce26409a25d9fd99bbfa2159044790 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 15 Mar 2023 11:53:44 +0000 Subject: [PATCH 2/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/fuzz/fuzz_markdown.py | 20 ++++---- tests/fuzz/fuzz_markdown_extended.py | 68 +++++++++++++++------------- 2 files changed, 48 insertions(+), 40 deletions(-) diff --git a/tests/fuzz/fuzz_markdown.py b/tests/fuzz/fuzz_markdown.py index 96e26f03..d78ef697 100644 --- a/tests/fuzz/fuzz_markdown.py +++ b/tests/fuzz/fuzz_markdown.py @@ -1,21 +1,23 @@ import sys + import atheris + from markdown_it import MarkdownIt def TestOneInput(data): - fdp = atheris.FuzzedDataProvider(data) - md = MarkdownIt() - raw_markdown = fdp.ConsumeUnicodeNoSurrogates(sys.maxsize) - md.parse(raw_markdown) - md.render(raw_markdown) + fdp = atheris.FuzzedDataProvider(data) + md = MarkdownIt() + raw_markdown = fdp.ConsumeUnicodeNoSurrogates(sys.maxsize) + md.parse(raw_markdown) + md.render(raw_markdown) def main(): - atheris.instrument_all() - atheris.Setup(sys.argv, TestOneInput) - atheris.Fuzz() + atheris.instrument_all() + atheris.Setup(sys.argv, TestOneInput) + atheris.Fuzz() if __name__ == "__main__": - main() + main() diff --git a/tests/fuzz/fuzz_markdown_extended.py b/tests/fuzz/fuzz_markdown_extended.py index 93447de0..4ba749ee 100644 --- a/tests/fuzz/fuzz_markdown_extended.py +++ b/tests/fuzz/fuzz_markdown_extended.py @@ -1,4 +1,5 @@ import sys + import atheris # Beautified from auto-generated fuzzer at: @@ -7,41 +8,46 @@ # Imports by the generated code import markdown_it + def TestOneInput(data): - fdp = atheris.FuzzedDataProvider(data) - val_1 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 1024)) - val_2 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 1024)) - val_3 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) - val_4 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) - val_5 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) - val_6 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) - val_7 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) - val_8 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) - val_9 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) - val_10 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) - - try: - c1 = markdown_it.main.MarkdownIt() - c1.render(val_1) - c1.parse(val_2) - c1.renderInline(val_3) - c1.parseInline(val_4) - c1.normalizeLink(val_5) - c1.normalizeLinkText(val_6) - c1.disable(val_7) - c1.enable(val_8) - c1.validateLink(val_9) - c1.configure(val_10) - except(ValueError,KeyError,TypeError,): - # Exceptions thrown by the hit code. - pass + fdp = atheris.FuzzedDataProvider(data) + val_1 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 1024)) + val_2 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 1024)) + val_3 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + val_4 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + val_5 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + val_6 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + val_7 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + val_8 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + val_9 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + val_10 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + + try: + c1 = markdown_it.main.MarkdownIt() + c1.render(val_1) + c1.parse(val_2) + c1.renderInline(val_3) + c1.parseInline(val_4) + c1.normalizeLink(val_5) + c1.normalizeLinkText(val_6) + c1.disable(val_7) + c1.enable(val_8) + c1.validateLink(val_9) + c1.configure(val_10) + except ( + ValueError, + KeyError, + TypeError, + ): + # Exceptions thrown by the hit code. + pass def main(): - atheris.instrument_all() - atheris.Setup(sys.argv, TestOneInput) - atheris.Fuzz() + atheris.instrument_all() + atheris.Setup(sys.argv, TestOneInput) + atheris.Fuzz() if __name__ == "__main__": - main() + main() From d05a346dd34e7d2bda5735d3307de2458290cca5 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Sun, 26 Mar 2023 11:00:10 +0200 Subject: [PATCH 3/6] Try adding CI job --- .github/workflows/fuzz.yml | 25 +++++++++++++++++++++++++ tox.ini | 10 ++++++++++ 2 files changed, 35 insertions(+) create mode 100644 .github/workflows/fuzz.yml diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml new file mode 100644 index 00000000..cbb11dbd --- /dev/null +++ b/.github/workflows/fuzz.yml @@ -0,0 +1,25 @@ +name: fuzzing +on: + push: + branches: [master] + pull_request: + schedule: + - cron: '0 0 * * 0' # every week + +jobs: + + basic: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: 3.8 + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install atheris + pip install . + - name: run fuzzing + run: python tests/fuzz/fuzz_markdown.py diff --git a/tox.ini b/tox.ini index 85a7179b..927a70cd 100644 --- a/tox.ini +++ b/tox.ini @@ -60,6 +60,16 @@ commands = dot -Tsvg -o "{toxworkdir}/prof/output.svg" "{toxworkdir}/prof/output.dot" python -c 'import pathlib; print("profiler svg output under file://\{0\}".format(pathlib.Path(r"{toxworkdir}") / "prof" / "output.svg"))' +[testenv:fuzz] +description = run fuzzer +deps = atheris +allowlist_externals = + git +commands_pre = + git clone --single-branch https://github.com/google/oss-fuzz {envtmpdir}/oss-fuzz + python {envtmpdir}/oss-fuzz/infra/helper.py build_fuzzers markdown-it-py +commands = python {envtmpdir}/oss-fuzz/infra/helper.py run_fuzzer markdown-it-py fuzz_markdown + [flake8] max-line-length = 100 extend-ignore = E203 From 6d27f50ff15739c104b3b34a2967fd661d51121c Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Tue, 28 Mar 2023 15:39:10 +0200 Subject: [PATCH 4/6] Update fuzz.yml --- .github/workflows/fuzz.yml | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml index cbb11dbd..a91a81db 100644 --- a/.github/workflows/fuzz.yml +++ b/.github/workflows/fuzz.yml @@ -3,23 +3,24 @@ on: push: branches: [master] pull_request: - schedule: - - cron: '0 0 * * 0' # every week jobs: - - basic: + Fuzzing: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - name: Set up Python - uses: actions/setup-python@v4 + - name: Build Fuzzers + id: build + uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master + with: + oss-fuzz-project-name: 'markdown-it-py' + - name: Run Fuzzers + uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master + with: + oss-fuzz-project-name: 'markdown-it-py' + fuzz-seconds: 600 + - name: Upload Crash + uses: actions/upload-artifact@v3 + if: failure() && steps.build.outcome == 'success' with: - python-version: 3.8 - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install atheris - pip install . - - name: run fuzzing - run: python tests/fuzz/fuzz_markdown.py + name: artifacts + path: ./out/artifacts From e78ec69531a53402362675553dfbe2416bd50a9f Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Tue, 28 Mar 2023 16:00:02 +0200 Subject: [PATCH 5/6] Update fuzz.yml --- .github/workflows/fuzz.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml index a91a81db..049ca07b 100644 --- a/.github/workflows/fuzz.yml +++ b/.github/workflows/fuzz.yml @@ -13,10 +13,12 @@ jobs: uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master with: oss-fuzz-project-name: 'markdown-it-py' + language: python - name: Run Fuzzers uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master with: oss-fuzz-project-name: 'markdown-it-py' + language: python fuzz-seconds: 600 - name: Upload Crash uses: actions/upload-artifact@v3 From 497fee82a110f39dca98a7009bffc5a3710f9a5b Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Thu, 27 Apr 2023 16:21:05 +0200 Subject: [PATCH 6/6] reword fuzz README.md --- tests/fuzz/README.md | 56 +++++++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/tests/fuzz/README.md b/tests/fuzz/README.md index fa0438ef..87075a70 100644 --- a/tests/fuzz/README.md +++ b/tests/fuzz/README.md @@ -1,39 +1,41 @@ -# Fuzzing by way of OSS-Fuzz +# OSS-Fuzz integration -Fuzzing set up that is run by OSS-Fuzz. The relevant files in the OSS-Fuzz -repository is [here](https://github.com/google/oss-fuzz/tree/master/projects/markdown-it-py). +In principle, core Markdown parsing is designed to never except/crash on any input, +and so [fuzzing](https://en.wikipedia.org/wiki/Fuzzing) can be used to test this conformance. +This folder contains fuzzers which are principally run downstream as part of the infrastructure. -The fuzzers require the [Atheris](https://pypi.org/project/atheris/) package. -You need this package to run the fuzzers locally (i.e. non oss-fuzz). +Any file that matches `fuzz_*.py` in this repository will be built and run on OSS-Fuzz +(see ). -## Building by way of OSS-Fuzz -The following steps will build the fuzzers using the OSS-Fuzz infrastructure: -``` -git clone https://github.com/google/oss-fuzz -cd oss-fuzz -python3 infra/helper.py build_fuzzers markdown-it-py +See for full details. -# The fuzzers are now placed in build/out/markdown-it-py -# To run the fuzz_markdown fuzzer: -python3 infra/helper.py run_fuzzer markdown-it-py fuzz_markdown -``` +## CI integration + +Fuzzing essentially runs forever, or until a crash is found, therefore it cannot be fully integrated into local continous integration testing. +The workflow in `.github/workflows/fuzz.yml` though runs a brief fuzzing on code changed in a PR, +which can be used to provide early warning on code changes. + +## Reproducing crash failures -## Extending so fuzzers run on OSS-Fuzz -The build script on the OSS-Fuzz repository for markdown-it-py fuzzers is -here: https://github.com/google/oss-fuzz/blob/master/projects/markdown-it-py/build.sh +If OSS-Fuzz (or the CI workflow) identifies a crash, it will produce a "minimized testcase" file +(e.g. ). + +To reproduce this crash locally, the easiest way is to run the [tox](https://tox.wiki/) environment, provided in this repository, against the test file (see `tox.ini`): + +``` +tox -e fuzz path/to/testcase +``` -Any file that matches `fuzz_*.py` in this repository will be build and run on -OSS-Fuzz. Thus, to extend with a new fuzzer simply name it accordingly. +This idempotently sets up a local python environment with markdown-it-py (local dev) and [Atheris](https://pypi.org/project/atheris/) installed, +clones into it, +and builds the fuzzers. +Then the testcase is run within this environment. -## Reproducing issues -In order to reproduce an issue reported by OSS-Fuzz, you need to: -1) Download the `Minimized Testcase` (which is a file or raw bytes) from the -detailed OSS-Fuzz reports. Example link: https://oss-fuzz.com/testcase-detail/5424112454729728 -2) Build the fuzzers as shown above -3) Use the command: +If you wish to simply run the full fuzzing process, +you can activate this environment, then run e.g.: ``` -python3 infra/helper.py reproduce markdown-it-py {FUZZER_NAME} {PATH_TO_MINIMIZED_TESTCASE} +python .tox/fuzz/oss-fuzz/infra/helper.py run_fuzzer markdown-it-py fuzz_markdown ``` For a more thorough guide on reproducing, see: https://google.github.io/oss-fuzz/advanced-topics/reproducing/