diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..8233962e --- /dev/null +++ b/.dockerignore @@ -0,0 +1,5 @@ +target_injections +target_bins +target_configs +tests +docs \ No newline at end of file diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..355ffe0a --- /dev/null +++ b/.gitattributes @@ -0,0 +1,6 @@ +# Always checkout with LF +*.sh text eol=lf +*.txt text eol=lf + +# Always checkout with CRLF +*.bat text eol=crlf \ No newline at end of file diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 00000000..86b08292 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,54 @@ +name: Test Lava + +on: + workflow_dispatch: + pull_request: + branches: + - master + - next + - feature* + - fix* + +jobs: + + build: + runs-on: ubuntu-22.04 + env: + LLVM_DIR: /usr/lib/llvm-11 + DEBIAN_FRONTEND: noninteractive + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Install all requirements + run: bash install.sh + + # For now I am pushing LAVA, but eventually this should be just a testing repostiory + build_container: + runs-on: panda-arc + steps: + - name: 'Login to Docker Registry' + uses: docker/login-action@v3 + with: + username: pandare + password: ${{secrets.ALL_PANDARE_DOCKERHUB}} + + - name: Checkout LAVA at current commit + uses: actions/checkout@v4 + + - name: Build Lava Docker image + uses: docker/build-push-action@v5 + with: + push: true + context: ${{ github.workspace }} + tags: | + pandare/lava:latest + + # - name: Update Docker Hub Description + # uses: peter-evans/dockerhub-description@v4 + # with: + # username: pandare + # password: ${{secrets.ALL_PANDARE_DOCKERHUB}} + # repository: pandare/lava + # short-description: ${{ github.event.repository.description }} diff --git a/.github/workflows/publish_docker.yml b/.github/workflows/publish_docker.yml new file mode 100644 index 00000000..454fbd41 --- /dev/null +++ b/.github/workflows/publish_docker.yml @@ -0,0 +1,48 @@ +name: Publish Lava Docker Container # Only for main lava repo, not forks + +on: + workflow_dispatch: + push: + branches: + - master + +jobs: + create_release: + if: github.repository == 'panda-re/lava' && github.ref == 'refs/heads/master' + runs-on: ubuntu-latest + outputs: + v-version: ${{ steps.version.outputs.v-version }} + steps: + - name: Get next version + uses: reecetech/version-increment@2023.10.2 + id: version + with: + release_branch: master + use_api: true + increment: patch + + build_stable: + needs: [create_release] + if: github.repository == 'panda-re/lava' && github.ref == 'refs/heads/master' + runs-on: panda-arc + steps: + - name: 'Login to Docker Registry' + uses: docker/login-action@v3 + with: + username: pandare + password: ${{secrets.ALL_PANDARE_DOCKERHUB}} + + - name: Checkout LAVA at current commit + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Build lava:latest + uses: docker/build-push-action@v5 + with: + context: ${{ github.workspace }} + push: true + tags: | + pandare/lava:latest + pandare/lava:${{ github.sha }} + pandare/lava:${{ needs.create_release.outputs.v-version }} diff --git a/.gitignore b/.gitignore index 5896f70d..87953435 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,9 @@ -# ignore IDE and existing lava build directory +# ignore IDE and any panda wheel/debian packages +.vscode .idea -lava +*.deb +*.whl +.env # this existed before .gdb_history diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index a05629a8..00000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "panda/src"] - path = panda/src - url = https://github.com/panda-re/panda.git diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..e1ab942f --- /dev/null +++ b/Dockerfile @@ -0,0 +1,64 @@ +ARG BASE_IMAGE="ubuntu:22.04" + +### BASE IMAGE +FROM $BASE_IMAGE AS base +ARG BASE_IMAGE + +ENV DEBIAN_FRONTEND=noninteractive +ENV LLVM_DIR=/usr/lib/llvm-11 +ENV PATH="/scripts:${PATH}" +ENV PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python + +# Copy dependencies lists into container. We copy them all and then do a mv because +# we need to transform base_image into a windows compatible filename which we can't +# do in a COPY command. +COPY ./dependencies/* /tmp +COPY ./requirements.txt /tmp +COPY ./tools/ /tools +COPY ./scripts/ /scripts + +RUN mv /tmp/$(echo "$BASE_IMAGE" | sed 's/:/_/g')_build.txt /tmp/build_dep.txt && \ + mv /tmp/$(echo "$BASE_IMAGE" | sed 's/:/_/g')_base.txt /tmp/base_dep.txt + +# Base image just needs runtime dependencies +RUN [ -e /tmp/base_dep.txt ] && \ + apt-get -qq update && \ + apt-get -qq install -y --no-install-recommends curl $(cat /tmp/base_dep.txt | grep -o '^[^#]*') && \ + apt-get clean + +# Finally: Install panda debian package, you need a version that has the Dwarf2 Plugin +RUN curl -LJO https://github.com/panda-re/panda/releases/download/v1.8.23/pandare_22.04.deb +RUN mv ./pandare_22.04.deb /tmp +RUN apt install -qq -y /tmp/pandare_22.04.deb +RUN pip install -r /tmp/requirements.txt + +### BUILD IMAGE - STAGE 2 +RUN [ -e /tmp/build_dep.txt ] && \ + apt-get -qq update && \ + apt-get install -y --no-install-recommends $(cat /tmp/build_dep.txt | grep -o '^[^#]*') && \ + apt-get clean + +RUN cd /tmp && \ + git clone https://github.com/capstone-engine/capstone/ -b v4 && \ + cd capstone/ && ./make.sh && make install && cd /tmp && \ + rm -rf /tmp/capstone && ldconfig + +#### Develop setup: panda built + pypanda installed (in develop mode) - Stage 3 +#### Essentially same as setup_container.sh +RUN cd /tools/btrace && ./compile.sh + +RUN rm -rf /tools/build +RUN mkdir -p /tools/build +RUN mkdir -p /tools/install + +RUN cmake -B"/tools/build" -H"/tools" -DCMAKE_INSTALL_PREFIX="/tools/install" +RUN make --no-print-directory -j4 install -C "/tools/build/lavaTool" +RUN make --no-print-directory -j4 install -C "/tools/build/fbi" + +# We need 32-bit support inside the container for now +RUN dpkg --add-architecture i386 && apt-get update && apt-get -y install zlib1g-dev:i386 gcc-multilib + +# RUN useradd volcana +# RUN chown -R volcana:volcana /tools/ +# RUN chown -R volcana:volcana /scripts/ +# USER volcana diff --git a/SETUP.md b/SETUP.md deleted file mode 100644 index 90e3cd1d..00000000 --- a/SETUP.md +++ /dev/null @@ -1,49 +0,0 @@ -# LAVA Installation Guide - -## Docker installation - sudo apt-get install docker.io - -## If that doesn’t work: - sudo apt-get update - sudo apt-get upgrade //optional - - sudo apt-key adv --keyserver hkp://ha.pool.sks-keyservers.net:80 \ - --recv-keys 58118E89F3A912897C070ADBF76221572C52609D - - touch /etc/apt/source.list.d/docker.list - - echo "deb https://apt.dockerproject.org/repo ubuntu-xenial main" | sudo tee - /etc/apt/sources.list.d/docker.list - - sudo apt-get update - - sudo apt-get install docker-engine - sudo service docker start -NB: Change the distribution version name accordingly - - -## Git and Python installation - sudo apt-get install git - sudo apt-get install python - sudo apt-get install python-pip - sudo pip install --upgrade pip - sudo pip install colorama - -## Grant docker usage for non-root - sudo usermod -a -G docker $USER - su - $USER - docker ps //test - -## Clone the repository - git clone git@bitbucket.org:moyix/lava.git - - or - - git clone https://$YOUR_USERNAME@bitbucket.org/moyix/lava.git - -## Install LAVA - cd lava - python setup.py - -## Try LAVA out - python init_project.py diff --git a/dependencies/README.md b/dependencies/README.md new file mode 100644 index 00000000..d565453e --- /dev/null +++ b/dependencies/README.md @@ -0,0 +1,7 @@ +This directory contains plaintext lists of build and runtime dependencies for LAVA on various architectures. +The files here are sourced by our Dockerfile as well as our installation scripts. +By consolidating dependencies into a single location, we're able to avoid things getting out of sync. + +Files must be named `[base_image]_[base|build].txt` where `base_image` refers to the docker tag used (e.g., `ubuntu:20.04`). Build should describe build dependencies and base should describe runtime dependencies. + +Files can contain comments with `#` diff --git a/dependencies/ubuntu_20.04_base.txt b/dependencies/ubuntu_20.04_base.txt new file mode 100644 index 00000000..5e161047 --- /dev/null +++ b/dependencies/ubuntu_20.04_base.txt @@ -0,0 +1 @@ +# lava dependencies, needed to run LAVA diff --git a/dependencies/ubuntu_20.04_build.txt b/dependencies/ubuntu_20.04_build.txt new file mode 100644 index 00000000..04b0491e --- /dev/null +++ b/dependencies/ubuntu_20.04_build.txt @@ -0,0 +1,24 @@ +# lava dependencies, to compile LAVA + +# Based on original setup.py after panda is installed step +# https://installati.one/install-odb-ubuntu-20-04/?expand_article=1 +odb + +# https://installati.one/install-libodbc2-ubuntu-22-04/?expand_article=1 +libodbc2 + +# https://installati.one/install-libodb-pgsql-2.4-ubuntu-20-04/ +libodb-pgsql-dev + +# Brendan noticed these libraries were needed to compile FBI +libodb-pgsql-2.4 + + +# https://pypi.org/project/pyzmq/ +libzmq3-dev + +# libc6 needed for compiling btrace? + +# libjsoncpp needed for fbi json parsing? + +# I may need g++-10? \ No newline at end of file diff --git a/dependencies/ubuntu_22.04_base.txt b/dependencies/ubuntu_22.04_base.txt new file mode 100644 index 00000000..d2a04fa6 --- /dev/null +++ b/dependencies/ubuntu_22.04_base.txt @@ -0,0 +1,3 @@ +# lava dependencies, needed to run LAVA +python3-pip +libprotobuf-dev \ No newline at end of file diff --git a/dependencies/ubuntu_22.04_build.txt b/dependencies/ubuntu_22.04_build.txt new file mode 100644 index 00000000..db4d73de --- /dev/null +++ b/dependencies/ubuntu_22.04_build.txt @@ -0,0 +1,46 @@ +# lava dependencies, to compile LAVA + +# Initial list based on what Andrew Fasano thinks +bc +build-essential +clang-tools-11 +cmake +git +inotify-tools +jq +libclang-11-dev +libfdt-dev +libjsoncpp-dev + +libpq-dev +llvm-11-dev +postgresql +socat + +# Curl must work +ca-certificates + +# Based on original setup.py after panda is installed step +# https://installati.one/install-odb-ubuntu-20-04/?expand_article=1 +odb + +# https://installati.one/install-libodbc2-ubuntu-22-04/?expand_article=1 +libodbc2 + +# https://installati.one/install-libodb-pgsql-2.4-ubuntu-20-04/ +libodb-pgsql-dev +# Brendan noticed these libraries were needed to compile FBI +libodb-pgsql-2.4 + +# https://pypi.org/project/pyzmq/ +libzmq3-dev + +# libc6 needed for compiling btrace? + +# libjsoncpp needed for fbi json parsing? + +# I need this for making LavaTool +g++-10 + +# Install dwarf dump, you need this for 64-bit bugs +dwarfdump \ No newline at end of file diff --git a/docker/Dockerfile b/docker/Dockerfile deleted file mode 100644 index 0fb62309..00000000 --- a/docker/Dockerfile +++ /dev/null @@ -1,123 +0,0 @@ -FROM debian:stretch -RUN echo "deb [check-valid-until=no] http://snapshot.debian.org/archive/debian/20220630T220025Z/ stretch main" > /etc/apt/sources.list -RUN echo "deb-src [check-valid-until=no] http://snapshot.debian.org/archive/debian/20220630T220025Z/ stretch main" >> /etc/apt/sources.list -RUN echo "deb [check-valid-until=no] http://snapshot.debian.org/archive/debian/20220630T220025Z/ stretch-updates main" >> /etc/apt/sources.list -RUN echo "deb-src [check-valid-until=no] http://snapshot.debian.org/archive/debian/20220630T220025Z/ stretch-updates main" >> /etc/apt/sources.list -RUN echo "deb [check-valid-until=no] http://snapshot.debian.org/archive/debian/20220630T220025Z/ stretch-backports main" >> /etc/apt/sources.list -RUN echo "deb-src [check-valid-until=no] http://snapshot.debian.org/archive/debian/20220630T220025Z/ stretch-backports main" >> /etc/apt/sources.list -RUN apt-get update && apt-get -y dist-upgrade && \ - apt-get remove -y libnettle6 libelf1 && \ - apt-get install -y --allow-downgrades zlib1g=1:1.2.8.dfsg-5 && \ - apt-get autoremove -y -RUN apt-get install -y sudo build-essential python wget cmake gdb gawk mlocate \ - vim libc++-dev g++-multilib g++ ninja-build \ - git jq bc python python-colorama zsh \ - python-pexpect python-psutil \ - python-numpy python-argparse python-pip python-dev libpq-dev \ - sudo gdb gawk zlib1g-dev autoconf libtool pkg-config locales \ - genisoimage postgresql-client-9.6 \ - libglib2.0-dev libpixman-1-dev groff-base libdwarf-dev libcapstone-dev wireshark-dev libelf-dev \ - flex bison -ENV release 3.6.2 -ENV llvm_version llvm-${release} - -RUN wget --no-check-certificate https://llvm.org/releases/$release/$llvm_version.src.tar.xz -RUN tar -xJf $llvm_version.src.tar.xz - -RUN mv $llvm_version.src $llvm_version -RUN cd $llvm_version - -WORKDIR /$llvm_version/tools -ENV clang_version cfe-$release -RUN wget --no-check-certificate https://llvm.org/releases/$release/$clang_version.src.tar.xz -RUN tar -xJf $clang_version.src.tar.xz -RUN mv $clang_version.src clang - -WORKDIR /$llvm_version/tools/clang/tools -RUN wget --no-check-certificate https://llvm.org/releases/$release/clang-tools-extra-$release.src.tar.xz -RUN tar -xJf clang-tools-extra-$release.src.tar.xz -RUN mv clang-tools-extra-$release.src extra - -WORKDIR /$llvm_version -#RUN ./configure --enable-optimized --disable-assertions --enable-targets=x86 --enable-shared --enable-pic --host=i486-linux-gnu --build=i486-linux-gnu -RUN mkdir /$llvm_version/build -RUN mkdir /$llvm_version/Release -WORKDIR /$llvm_version/build -RUN cmake ../ -DCMAKE_INSTALL_PREFIX=/$llvm_version/Release -DLLVM_TARGETS_TO_BUILD=X86 \ - -DBUILD_SHARED_LIBS=true -DLLVM_ENABLE_ASSERTIONS=false -DLLVM_ENABLE_RTTI=true \ -#-DLLVM_BUILD_32_BITS=true \ - -DLLVM_ENABLE_PIC=true -DCMAKE_BUILD_TYPE=Release -DLLVM_TARGET_ARCH=i486-linux-gnu \ - -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=0" \ - -G "Ninja" -RUN ninja install - -WORKDIR / -RUN wget http://codesynthesis.com/download/odb/2.4/odb_2.4.0-1_amd64.deb -RUN dpkg -i odb_2.4.0-1_amd64.deb -#RUN wget http://codesynthesis.com/download/odb/2.4/odb-2.4.0.tar.gz -RUN wget http://codesynthesis.com/download/odb/2.4/libodb-2.4.0.tar.gz -RUN tar xf libodb-2.4.0.tar.gz -WORKDIR /libodb-2.4.0 -RUN CXXFLAGS='-D_GLIBCXX_USE_CXX11_ABI=0' ./configure --enable-shared && make -j $(nproc) && make install -WORKDIR / -RUN wget http://codesynthesis.com/download/odb/2.4/libodb-pgsql-2.4.0.tar.gz -RUN tar xf libodb-pgsql-2.4.0.tar.gz -WORKDIR /libodb-pgsql-2.4.0 -RUN CXXFLAGS='-D_GLIBCXX_USE_CXX11_ABI=0' ./configure --enable-shared && make -j $(nproc) && make install - -RUN echo "/usr/local/lib" > /etc/ld.so.conf.d/usr-local-lib.conf -RUN ldconfig - -RUN pip install --upgrade -v pip -i https://pypi.python.org/simple/ -RUN pip install subprocess32 lockfile sqlalchemy==1.0.14 -i https://pypi.python.org/simple - -RUN pip install pyyaml pycparser psycopg2 -RUN updatedb - -RUN echo "LLVM_DIR=/$llvm_version/Release/share/llvm/cmake" >> /etc/environment -RUN echo "LD_LIBRARY_PATH=/$llvm_version/Release/lib" >> /etc/environment -RUN echo "LIBRARY_PATH=/usr/local/lib" >> /etc/environment -RUN echo "PATH=$PATH:/$llvm_version/Release/bin" >> /etc/environment - -RUN apt-get install -y sudo gdb gawk zlib1g-dev autoconf libtool pkg-config - -# Set locale to C.UTF-8 instead of us_EN.UTF-8 -RUN apt-get install -y locales -RUN locale-gen C.UTF-8 -RUN locale -a -RUN update-locale LANG=C.UTF-8 - -# Install libjsoncpp -WORKDIR / -RUN wget https://github.com/open-source-parsers/jsoncpp/archive/refs/tags/1.7.4.tar.gz -RUN tar -xzf 1.7.4.tar.gz -WORKDIR jsoncpp-1.7.4 -RUN cmake -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=0" . && make -j && make install && ldconfig - -# Install protobuf -WORKDIR / -RUN git clone https://github.com/protocolbuffers/protobuf.git -WORKDIR protobuf -RUN git checkout 3.6.x -RUN ./autogen.sh && CXXFLAGS='-D_GLIBCXX_USE_CXX11_ABI=0' ./configure && make -j $(nproc) && make install && ldconfig -WORKDIR / -RUN wget https://github.com/protobuf-c/protobuf-c/releases/download/v1.3.1/protobuf-c-1.3.1.tar.gz && tar -xzf protobuf-c-1.3.1.tar.gz -WORKDIR protobuf-c-1.3.1 -RUN CXXFLAGS='-D_GLIBCXX_USE_CXX11_ABI=0' ./configure && make -j $(nproc) && make install && ldconfig - -# Setup PANDA -RUN apt-get install -y libglib2.0-dev libpixman-1-dev groff-base libdwarf-dev libcapstone-dev wireshark-dev libelf-dev flex bison -WORKDIR / -RUN wget https://releases.llvm.org/3.3/llvm-3.3.src.tar.gz -RUN tar -xzf llvm-3.3.src.tar.gz -RUN wget https://releases.llvm.org/3.3/cfe-3.3.src.tar.gz -RUN tar -xzf cfe-3.3.src.tar.gz && mv cfe-3.3.src /llvm-3.3.src/tools/clang -WORKDIR /llvm-3.3.src -RUN CXXFLAGS='-D_GLIBCXX_USE_CXX11_ABI=0' ./configure --prefix=/llvm-3.3-install && make -j $(nproc) && make install - -# Setting up remote Postgres(host) && PANDA Aux Packages -RUN apt-get install -y genisoimage postgresql-client-9.6 -# pg_hba.conf: -# host all all 172.17.0.1/16 md5 -# postgresql.conf: -# listen_addresses = 'localhost, 172.17.0.1' diff --git a/docker/debug/Dockerfile b/docker/debug/Dockerfile deleted file mode 100644 index a9d4a5a6..00000000 --- a/docker/debug/Dockerfile +++ /dev/null @@ -1,17 +0,0 @@ -FROM lava32 - -RUN apt-get update -RUN apt-get -y install locales - -RUN echo en_US.UTF-8 UTF-8 | tee /etc/locale.gen -RUN locale-gen - -ENV LANG en_US.UTF-8 -ENV LANGUAGE en_US -ENV LC_ALL en_US.utf8 - -RUN apt-get -y install sudo gdb vim emacs exuberant-ctags hexedit - -WORKDIR /$llvm_version -RUN ./configure --disable-optimized --enable-assertions --enable-targets=x86 --enable-shared --enable-pic --host=i486-linux-gnu --build=i486-linux-gnu -RUN REQUIRES_RTTI=1 make -j $(nproc) diff --git a/docker/rebuild-notes b/docker/rebuild-notes deleted file mode 100644 index d493d1e2..00000000 --- a/docker/rebuild-notes +++ /dev/null @@ -1,2 +0,0 @@ -cd docker -docker build . -t lava32 diff --git a/docker/requirements.txt b/docker/requirements.txt deleted file mode 100644 index caef5edc..00000000 --- a/docker/requirements.txt +++ /dev/null @@ -1,13 +0,0 @@ -SQLAlchemy==1.0.14 -zmq==0.0.0 -pyzmq==15.2.0 -psycopg2==2.6.1 -lockfile==0.10.2 -tabulate==0.7.5 -pexpect==3.2 -psutil==5.6.6 -ipython==2.3.0 -colorama==0.3.2 -numpy==1.8.2 -argparse==1.2.1 -subprocess32==3.2.6 diff --git a/docker/sources.list b/docker/sources.list deleted file mode 100644 index c4c9a64e..00000000 --- a/docker/sources.list +++ /dev/null @@ -1,35 +0,0 @@ -# - -# deb cdrom:[Debian GNU/Linux 7.8.0 _Wheezy_ - Official i386 NETINST Binary-1 20150110-13:31]/ wheezy main - -#deb cdrom:[Debian GNU/Linux 7.8.0 _Wheezy_ - Official i386 NETINST Binary-1 20150110-13:31]/ wheezy main - -# The below repositories are now archived and unavailable -#deb http://ftp.us.debian.org/debian/ wheezy main -#deb-src http://ftp.us.debian.org/debian/ wheezy main -# -#deb http://security.debian.org/ wheezy/updates main -## Line commented out by installer because it failed to verify: -##deb-src http://security.debian.org/ wheezy/updates main -# -## wheezy-updates, previously known as 'volatile' -#deb http://ftp.us.debian.org/debian/ wheezy-updates main -#deb-src http://ftp.us.debian.org/debian/ wheezy-updates main -# -#deb http://ftp.us.debian.org/debian/ wheezy-backports main -#deb-src http://ftp.us.debian.org/debian/ wheezy-backports main - -# Updated snapshot repositories (of all repos above) -deb http://snapshot.debian.org/archive/debian/20211106T025313Z/ wheezy main -deb-src http://snapshot.debian.org/archive/debian/20211106T025313Z/ wheezy main - -deb http://snapshot.debian.org/archive/debian-security/20211106T025313Z/ wheezy/updates main -# Line commented out by installer because it failed to verify: -#deb-src http://snapshot.debian.org/archive/debian-security/20211106T025313Z/ wheezy/updates main - -# wheezy-updates, previously known as 'volatile' -deb http://snapshot.debian.org/archive/debian/20211106T025313Z/ wheezy-updates main -deb-src http://snapshot.debian.org/archive/debian/20211106T025313Z/ wheezy-updates main - -deb http://snapshot.debian.org/archive/debian/20211106T025313Z/ wheezy-backports main -deb-src http://snapshot.debian.org/archive/debian/20211106T025313Z/ wheezy-backports main diff --git a/docs/how-to-lava.md b/docs/how-to-lava.md index 6da33a80..1b4c059a 100644 --- a/docs/how-to-lava.md +++ b/docs/how-to-lava.md @@ -76,7 +76,7 @@ Fortunately, we've created a tool to help with this process- `lavaInitTool`. the preprocessed c files. 2.5.5 In each directory with your source code, run -`/llvm-3.6.2/Release/bin/clang-apply-replacements .` +`/usr/lib/llvm-11/bin/clang-apply-replacements .` 2.5.6 If you search through your code for `={0};` you should see variables that were previously unitialized now being initialized to null. diff --git a/init-host.py b/init-host.py index 87ea5f91..38630ff9 100755 --- a/init-host.py +++ b/init-host.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python3 # import argparse import os @@ -18,13 +18,8 @@ from colorama import Fore from colorama import Style -QCOW_URL = "http://panda.moyix.net/~moyix/wheezy_panda2.qcow2" -# if moyix server is down, this image will also work -# QCOW_URL = "https://panda.re/qcows/linux/debian/7.3/x86/debian_7.3_x86.qcow" -QCOW_FILE_NAME = "wheezy_panda2.qcow2" TAR_URL = "ftp://ftp.astron.com/pub/file/file-5.22.tar.gz" LAVA_DIR = dirname(abspath(sys.argv[0])) -os.chdir(LAVA_DIR) def progress(msg): @@ -57,20 +52,7 @@ def run(cmd): def main(): - # try to import lava.mak as a config file if not exit - try: - def_lines = (line.strip() for line in open("lava.mak", "r") - if not line.strip().startswith("#") - and line.strip() != "") - def_lines = (line.split(":=") for line in def_lines) - def_lines = ((line[0].strip(), line[1].strip()) for line in def_lines) - LAVA_CONFS = dict(def_lines) - PANDA_BUILD_DIR = LAVA_CONFS["PANDA_BUILD_DIR"] - PANDA_BUILD_DIR = expandvars(PANDA_BUILD_DIR) - print("PANDA_BUILD_DIR Used {}".format(PANDA_BUILD_DIR)) - except Exception: - error("Make sure to have properly configured lava.mak \ - generated by setup.py") + # parser = argparse.ArgumentParser(description='Setup LAVA') # parser.add_argument('-s', '--skip_docker_build', action='store_true', # default = False, @@ -86,25 +68,18 @@ def main(): # summon tar and qcow files if not isfile(join(TAR_DIR, basename(TAR_URL))): - progress("Downloading %s".format(basename(TAR_URL))) + progress("Downloading {}".format(basename(TAR_URL))) os.chdir(TAR_DIR) run(["wget", TAR_URL]) os.chdir(LAVA_DIR) else: progress("Found existing target_bins/{}".format(basename(TAR_URL))) - if not isfile(join(LAVA_DIR, basename(QCOW_URL))): - progress("Downloading {}".format(basename(QCOW_URL))) - run(["wget", "--no-check-certificate", QCOW_URL, "-O", QCOW_FILE_NAME]) - else: - progress("Found existing {}".format(basename(QCOW_URL))) - if not isfile(join(LAVA_DIR, "host.json")): progress("Building host.json") # Build host.json json_configs = {} - json_configs["qemu"] = join(join(PANDA_BUILD_DIR, "i386-softmmu"), - "qemu-system-i386") + json_configs["qemu"] = "panda-system-i386" json_configs["qcow_dir"] = LAVA_DIR json_configs["output_dir"] = join(LAVA_DIR, "target_injections") json_configs["config_dir"] = join(LAVA_DIR, "target_configs") @@ -115,19 +90,11 @@ def main(): out_json = join(LAVA_DIR, "host.json") with open(out_json, 'w') as f: - f.write(json.dumps(json_configs)) + f.write(json.dumps(json_configs, indent=4)) else: progress("Found existing host.json") - # progress("(re)building the fbi") - # os.chdir(join(LAVA_DIR, "tools", "build", "fbi")) - # run(["make", "install", "-j4"]) - - # progress("(re)building lavaTool") - # os.chdir(join(LAVA_DIR, "tools", "build", "lavaTool")) - # run(["./compile-on-docker.sh"]) - - progress("Sucessful! Now run:\n $ scripts/lava.sh -ak file") + progress("Successful! Now run:\n $ scripts/lava.sh -ak file") return 0 diff --git a/install.sh b/install.sh old mode 100644 new mode 100755 index ebab981c..fafa1c81 --- a/install.sh +++ b/install.sh @@ -1,9 +1,55 @@ #!/bin/bash -sudo add-apt-repository ppa:phulin/panda -sudo cp /etc/apt/sources.list /etc/apt/sources.list~ -sudo sed -Ei 's/^# deb-src /deb-src /' /etc/apt/sources.list -sudo apt-get update -sudo apt-get install python-pip git protobuf-compiler protobuf-c-compiler libprotobuf-c0-dev libprotoc-dev python-protobuf libelf-dev libcapstone-dev libdwarf-dev python-pycparser llvm-3.3 clang-3.3 libc++-dev libwiretap-dev libwireshark-dev odb -sudo apt-get build-dep qemu -sudo pip install --upgrade pip -sudo pip install colorama \ No newline at end of file + +set -ex + +# shellcheck disable=SC2034 +sudo="" +if [ $EUID -ne 0 ]; then + SUDO=sudo +fi + +progress() { + echo + echo -e "\e[32m[lava_install]\e[0m \e[1m$1\e[0m" +} + +progress "Updates complete" + +# This was originally in the docs/setup.md, I removed things starting with 'python-' as that should be installed via pip +# shellcheck disable=SC2046 +# libc6 needed for compiling btrace +# libjsoncpp needed for fbi json parsing +# This fixes installing psycopg2 +# https://stackoverflow.com/questions/11618898/pg-config-executable-not-found + +# Dependencies are for a major version, but the filenames include minor versions +# So take our major version, find the first match in dependencies directory and run with it. +# This will give us "./panda/dependencies/ubuntu:20.04" where ubuntu:20.04_build.txt or 20.04_base.txt exists +version=$(lsb_release -r | awk '{print $2}' | awk -F'.' '{print $1}') +# shellcheck disable=SC2086 +dep_base=$(find ./dependencies/ubuntu_${version}.* -print -quit | sed -e "s/_build\.txt\|_base\.txt//") + +if [ -e "${dep_base}"_build.txt ] || [ -e "${dep_base}"_base.txt ]; then + echo "Found dependency file(s) at ${dep_base}*.txt" + # shellcheck disable=SC2046 + # shellcheck disable=SC2086 + DEBIAN_FRONTEND=noninteractive $SUDO apt-get -y install --no-install-recommends $(cat ${dep_base}*.txt | grep -o '^[^#]*') +else + echo "Unsupported Ubuntu version: $version. Create a list of build dependencies in ${dep_base}_{base,build}.txt and try again." + exit 1 +fi + +curl -LJO https://github.com/panda-re/panda/releases/download/v1.8.23/pandare_22.04.deb +mv *.deb /tmp +$SUDO apt-get -y install /tmp/pandare_22.04.deb +rm /tmp/*.deb + +progress "Installed build dependencies" + +pip3 install --upgrade pip +pip3 install -r requirements.txt +progress "Installed Python requirements" + +bash ./setup_container.sh + +progress "Installed LAVA" diff --git a/panda/setup.sh b/panda/setup.sh deleted file mode 100755 index 32214d37..00000000 --- a/panda/setup.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -LLVM_DIR="/llvm-3.3-install" -PANDA_DIR="$( realpath $( dirname "${BASH_SOURCE[0]}" ) )" - -mkdir -p ${PANDA_DIR}/build -pushd ${PANDA_DIR}/build -QEMU_CFLAGS='-D_GLIBCXX_USE_CXX11_ABI=0' CXXFLAGS='-D_GLIBCXX_USE_CXX11_ABI=0' "../src/configure" \ - --target-list=i386-softmmu \ - --prefix="${PANDA_DIR}/install" \ - --cc=gcc-6 --cxx=g++-6 \ - --enable-llvm --with-llvm="${LLVM_DIR}" \ - --python=python2 - -make -j ${PANDA_NPROC:-$(nproc || sysctl -n hw.ncpu)} -popd diff --git a/panda/src b/panda/src deleted file mode 160000 index ff882598..00000000 --- a/panda/src +++ /dev/null @@ -1 +0,0 @@ -Subproject commit ff88259806d731ee6ee687e2de4a534ab27bd4d7 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..d2675377 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,9 @@ +colorama +db +ipython +lockfile +numpy +pandare +PyYAML +SQLAlchemy +tabulate diff --git a/scripts/add_queries.sh b/scripts/add_queries.sh index 4e0284be..f062ef67 100755 --- a/scripts/add_queries.sh +++ b/scripts/add_queries.sh @@ -38,7 +38,7 @@ USAGE() { } set -e # Exit on error -#set -x # Debug mode +set -x # Debug mode if [ $# -lt 1 ]; then USAGE $0 @@ -86,8 +86,8 @@ progress "queries" 0 "Configuring..." mkdir -p lava-install configure_file=${configure_cmd%% *} if [ -e "$configure_file" ]; then - CC=/llvm-3.6.2/Release/bin/clang \ - CXX=/llvm-3.6.2/Release/bin/clang++ \ + CC=$llvm/bin/clang \ + CXX=$llvm/bin/clang++ \ CFLAGS="-O0 -m32 -DHAVE_CONFIG_H -g -gdwarf-2 -fno-stack-protector -D_FORTIFY_SOURCE=0 -I. -I.. -I../include -I./src/" \ $configure_cmd --prefix=$(pwd)/lava-install fi @@ -103,8 +103,8 @@ for i in ${MAKES[@]}; do IFS=' ' read -ra ARGS <<< $i echo "$lava/tools/btrace/sw-btrace ${ARGS[@]}" - CC=/llvm-3.6.2/Release/bin/clang \ - CXX=/llvm-3.6.2/Release/bin/clang++ \ + CC=$llvm/bin/clang \ + CXX=$llvm/bin/clang++ \ CFLAGS="-O0 -m32 -DHAVE_CONFIG_H -g -gdwarf-2 -fno-stack-protector -D_FORTIFY_SOURCE=0 -I. -I.. -I../include -I./src/" \ $lava/tools/btrace/sw-btrace ${ARGS[@]} IFS='&&' @@ -116,14 +116,10 @@ progress "queries" 0 "Installing..." bash -c $install -# figure out where llvm is -llvm_src=$(grep LLVM_SRC_PATH $lava/tools/lavaTool/config.mak | cut -d' ' -f3) - - progress "queries" 0 "Creating compile_commands.json..." # Delete any pre-existing compile commands.json (could be in archive by mistake) rm -f compile_commands.json -$lava/tools/btrace/sw-btrace-to-compiledb $llvm_src/Release/lib/clang/3.6.2/include +$lava/tools/btrace/sw-btrace-to-compiledb $llvm/lib/clang/11/include if [ -e "$directory/$name/extra_compile_commands.json" ]; then sed -i '$d' compile_commands.json echo "," >> compile_commands.json @@ -134,7 +130,7 @@ git commit -m 'Add compile_commands.json.' cd .. -c_files=$(python $lava/tools/lavaTool/get_c_files.py $source) +c_files=$($python $lava/tools/lavaTool/get_c_files.py $source) c_dirs=$(for i in $c_files; do dirname $i; done | sort | uniq) progress "queries" 0 "Copying include files..." @@ -155,7 +151,7 @@ done #progress "queries" 0 "Initialize variables..." #for i in $c_files; do -# $lava/src_clang/build/lavaTool -action=init \ +# /src_clang/build/lavaTool -action=init \ # -p="$source/compile_commands.json" \ # -src-prefix=$(readlink -f "$source") \ # $i @@ -171,7 +167,7 @@ fninstr=$directory/$name/fninstr echo "Creating fninstr [$fninstr]" echo -e "\twith command: \"python $lava/scripts/fninstr.py -d -o $fninstr $fnfiles\"" -python $lava/scripts/fninstr.py -d -o $fninstr $fnfiles +$python $lava/scripts/fninstr.py -d -o $fninstr $fnfiles if [[ ! -z "$df_fn_blacklist" ]]; then cmd=$(echo "sed -i /${df_fn_blacklist}/d $fninstr") @@ -213,13 +209,13 @@ fi # Do we need to explicitly apply replacements in the root source directory # This causes clang-apply-replacements to segfault when run a 2nd time #pushd "$directory/$name/$source" -#$llvm_src/Release/bin/clang-apply-replacements . +#/usr/lib/llvm-11/bin/clang-apply-replacements . #popd for i in $c_dirs; do echo "Applying replacements to $i" pushd $i - $llvm_src/Release/bin/clang-apply-replacements . + $llvm/bin/clang-apply-replacements . popd done diff --git a/scripts/bug_mining.py b/scripts/bug_mining.py index 0b634fc7..5ae010a5 100644 --- a/scripts/bug_mining.py +++ b/scripts/bug_mining.py @@ -1,28 +1,23 @@ -''' +""" This script assumes you have already done src-to-src transformation with lavaTool to add taint and attack point queries to a program, AND managed to json project file. -Second arg is input file you want to run, under panda, to get taint info. -''' - -from __future__ import print_function +Second arg is an input file you want to run, under panda, to get taint info. +""" import os import sys import time -import pipes import shlex import shutil -import subprocess32 +import subprocess from colorama import Fore from colorama import Style from errno import EEXIST -from os.path import join -from os.path import abspath from os.path import dirname from os.path import basename @@ -32,14 +27,49 @@ from lava import LavaDatabase from vars import parse_vars +from os.path import abspath, join +from pandare import Panda +from pandare.extras import dwarfdump + +host_json = abspath(sys.argv[1]) +project_name = sys.argv[2] + +project = parse_vars(host_json, project_name) +qemu_path = project['qemu'] +panda = Panda(generic=qemu_path.split('-')[-1], + expect_prompt=project['expect_prompt']) debug = True qemu_use_rr = False start_time = 0 -version="2.0.0" -curtail=0 +version = "2.0.0" +curtail = 0 + +installdir = None +command_args = None + + +# Replace create_recording in first link +# https://github.com/panda-re/panda/blob/dev/panda/scripts/run_guest.py#L151-L189 +# https://github.com/panda-re/panda/blob/dev/panda/python/core/pandare/panda.py#L2595-L2645 +@panda.queue_blocking +def create_recording(): + global command_args + global installdir + print("args", command_args) + print("install dir", installdir) + guest_command = subprocess.list2cmdline(command_args) + # Technically the first two steps of record_cmd + # but running executable ONLY works with absolute paths + panda.revert_sync('root') + panda.copy_to_guest(installdir, absolute_paths=True) + + # Pass in None for snap_name since I already did the revert_sync already + panda.record_cmd(guest_command=guest_command, snap_name=None) + panda.stop_run() + def tick(): global start_time @@ -66,56 +96,28 @@ def progress(msg): if len(sys.argv) < 4: - print ("Bug mining script version {}".format(version)) + print("Bug mining script version {}".format(version)) print("Usage: python bug_mining.py host.json project_name inputfile", file=sys.stderr) sys.exit(1) tick() -host_json = abspath(sys.argv[1]) -project_name = sys.argv[2] - -project = parse_vars(host_json, project_name) - input_file = abspath(project["config_dir"] + "/" + sys.argv[3]) input_file_base = os.path.basename(input_file) print("bug_mining.py %s %s" % (project_name, input_file)) if len(sys.argv) > 4: - #global curtail + # global curtail curtail = int(sys.argv[4]) -qemu_path = project['qemu'] -qemu_build_dir = dirname(dirname(abspath(qemu_path))) -src_path = None - -print ("{}".format(join(qemu_build_dir, 'config-host.mak'))) - -with open(join(qemu_build_dir, 'config-host.mak')) as config_host: - for line in config_host: - var, sep, value = line.strip().partition('=') - if var == 'SRC_PATH': - src_path = value - break -assert src_path -panda_scripts_dir = join(src_path, 'panda', 'scripts') -sys.path.append(panda_scripts_dir) - -from run_guest import create_recording - -chaff = project.get('chaff', False) - -panda_os_string = project.get('panda_os_string', - 'linux-32-debian:3.2.0-4-686-pae') - lavadir = dirname(dirname(abspath(sys.argv[0]))) progress("Entering {}".format(project['output_dir'])) os.chdir(os.path.join(project['output_dir'])) -tar_files = subprocess32.check_output(['tar', 'tf', project['tarfile']]) +tar_files = subprocess.check_output(['tar', 'tf', project['tarfile']]).decode('utf-8') sourcedir = tar_files.splitlines()[0].split(os.path.sep)[0] sourcedir = abspath(sourcedir) @@ -123,15 +125,13 @@ def progress(msg): # e.g. file-5.22-true.iso installdir = join(sourcedir, 'lava-install') input_file_guest = join(installdir, input_file_base) -isoname = '{}-{}.iso'.format(sourcedir, input_file_base) -command_args = shlex.split(project['command'].format( - install_dir=pipes.quote(installdir), - input_file=input_file_guest)) +command_args = shlex.split( + project['command'].format( + install_dir=shlex.quote(installdir), + input_file=input_file_guest)) shutil.copy(input_file, installdir) -create_recording(qemu_path, project['qcow'], project['snapshot'], - command_args, installdir, isoname, - project["expect_prompt"], "ide1-cd0", rr=qemu_use_rr) +panda.run() try: os.mkdir('inputs') @@ -149,67 +149,84 @@ def progress(msg): progress("Starting first and only replay, tainting on file open...") # process name - if command_args[0].startswith('LD_PRELOAD'): + cmdpath = command_args[1] proc_name = basename(command_args[1]) else: + cmdpath = command_args[0] proc_name = basename(command_args[0]) -pandalog = "{}/queries-{}.plog".format(project['output_dir'], - os.path.basename(isoname)) +binpath = os.path.join(installdir, "bin", proc_name) +if not os.path.exists(binpath): + binpath = os.path.join(installdir, "lib", proc_name) + if not os.path.exists(binpath): + binpath = os.path.join(installdir, proc_name) -print("pandalog = [%s] " % pandalog) +pandalog = "{}/queries-{}.plog".format(project['output_dir'], input_file_base) +pandalog_json = "{}/queries-{}.json".format(project['output_dir'], input_file_base) -panda_args = { - 'pri': {}, - 'pri_dwarf': { - 'proc': proc_name, - 'g_debugpath': installdir, - 'h_debugpath': installdir - }, - 'pri_taint': { - 'hypercall': True, - 'chaff': chaff - }, - 'taint2': {'no_tp': True}, - 'tainted_branch': {}, - 'file_taint': { - 'pos': True, - 'cache_process_details_on_basic_block': True, - } -} +print("pandalog = [%s] " % pandalog) +dwarf_cmd = ["dwarfdump", "-dil", cmdpath] +dwarfout = subprocess.check_output(dwarf_cmd) +dwarfdump.parse_dwarfdump(dwarfout, binpath) + +# Based on this example: +# https://github.com/panda-re/panda/blob/dev/panda/python/examples/file_taint/file_taint.py +panda.set_pandalog(pandalog) +panda.load_plugin("pri") +panda.load_plugin("taint2", + args={ + 'no_tp': True + }) +panda.load_plugin("tainted_branch") + +panda.load_plugin("dwarf2", + args={ + 'proc': proc_name, + 'g_debugpath': installdir, + 'h_debugpath': installdir + }) +# pri_taint is almost same as Zhenghao's hypercall +# Chaffx64 branch says these are needed? +# if panda.arch != 'i386': +# panda.load_plugin('hypercall') +# panda.load_plugin('stackprob') + + +print(project) +#print('use_stdin' in project) +#print(project['use_stdin']) if 'use_stdin' in project and project['use_stdin']: - panda_args['file_taint']['first_instr'] = 1 - panda_args['file_taint']['use_stdin'] = proc_name + print("Using stdin for taint analysis") + panda.load_plugin("file_taint", + args={ + 'filename': input_file_guest, + 'pos': True, + 'cache_process_details_on_basic_block': True, + 'enable_taint_on_open': True, + 'verbose': True + }) else: - panda_args['file_taint']['enable_taint_on_open'] = True - -qemu_args = [ - project['qemu'], '-replay', isoname, - '-pandalog', pandalog, '-os', panda_os_string -] - -for plugin, plugin_args in panda_args.iteritems(): - qemu_args.append('-panda') - arg_string = ",".join(["{}={}".format(arg, val) - for arg, val in plugin_args.iteritems()]) - qemu_args.append('{}{}{}'.format(plugin, ':' - if arg_string else '', arg_string)) - -# Use -panda-plugin-arg to account for commas and colons in filename. -qemu_args.extend(['-panda-arg', 'file_taint:filename=' + input_file_guest]) - -dprint("qemu args: [{}]".format(subprocess32.list2cmdline(qemu_args))) -sys.stdout.flush() -try: - subprocess32.check_call(qemu_args, stderr=subprocess32.STDOUT) -except subprocess32.CalledProcessError: - if qemu_use_rr: - qemu_args = ['rr', 'record', project['qemu'], '-replay', isoname] - subprocess32.check_call(qemu_args) - else: - raise + print("Using open for taint analysis") + panda.load_plugin("file_taint", + args={ + 'filename': input_file_guest, + 'pos': True, + 'cache_process_details_on_basic_block': True, + 'first_instr': 1, + 'use_stdin': proc_name, + 'verbose': True + }) + +panda.load_plugin("pri_taint", args={ + 'hypercall': True, + 'chaff': False +}) + +# Default name is 'recording' +# https://github.com/panda-re/panda/blob/dev/panda/python/core/pandare/panda.py#L2595 +panda.run_replay("recording") replay_time = tock() print("taint analysis complete %.2f seconds" % replay_time) @@ -217,50 +234,45 @@ def progress(msg): tick() -progress("Trying to create database {}...".format(project['name'])) -createdb_args = ['createdb', '-U', 'postgres', '-h', 'database', project['db']] -createdb_result = subprocess32.call(createdb_args, - stdout=sys.stdout, stderr=sys.stderr) - -print() -if createdb_result == 0: # Created new DB; now populate - progress("Database created. Initializing...") - # psql_args = ['psql', '-U', 'postgres', '-d', project['db'], - # '-f', join(join(lavadir, 'include'), 'lava.sql')] - psql_args = ['psql', '-U', 'postgres', '-h', 'database', '-d', project['db'], - '-f', join(join(lavadir, 'fbi'), 'lava.sql')] - dprint("psql invocation: [%s]" % (" ".join(psql_args))) - subprocess32.check_call(psql_args, stdout=sys.stdout, stderr=sys.stderr) -else: - progress("Database already exists.") - -print() +# I attempted to upgrade the version, but panda had trouble including something +# for now, we can use the python implementation, although it is slower +# https://github.com/protocolbuffers/protobuf/releases/tag/v21.0 +# https://stackoverflow.com/questions/52040428/how-to-update-protobuf-runtime-library +os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'python' progress("Calling the FBI on queries.plog...") +convert_json_args = ['python3', '-m', 'pandare.plog_reader', pandalog] +print("panda log JSON invocation: [%s] > %s" % (subprocess.list2cmdline(convert_json_args), pandalog_json)) +try: + with open(pandalog_json, 'wb') as fd: + subprocess.check_call(convert_json_args, stdout=fd, stderr=sys.stderr) +except subprocess.CalledProcessError as e: + print("The script to convert the panda log into JSON has failed") + raise e + # fbi_args = [join(lavadir, 'fbi', 'fbi'), # project_file, pandalog, input_file_base] fbi_args = [join(lavadir, 'tools', 'install', 'bin', 'fbi'), host_json, - project_name, pandalog, input_file_base] + project_name, pandalog_json, input_file_base] # Command line curtial argument takes priority, otherwise use project specific one -#global curtail -if curtail !=0 : +# global curtail +if curtail != 0: fbi_args.append(str(curtail)) elif "curtail" in project: fbi_args.append(str(project.get("curtail", 0))) -dprint("fbi invocation: [%s]" % (subprocess32.list2cmdline(fbi_args))) +dprint("fbi invocation: [%s]" % (subprocess.list2cmdline(fbi_args))) sys.stdout.flush() try: - subprocess32.check_call(fbi_args, stdout=sys.stdout, stderr=sys.stderr) -except subprocess32.CalledProcessError as e: - print("FBI Failed. Possible causes: \n"+ - "\tNo DUAs found because taint analysis failed: \n" - "\t\t Ensure PANDA 'saw open of file we want to taint'\n" - "\t\t Make sure target has debug symbols (version2): No 'failed DWARF loading' messages\n" - "\tFBI crashed (bad arguments, config, or other untested code)") + subprocess.check_call(fbi_args, stdout=sys.stdout, stderr=sys.stderr) +except subprocess.CalledProcessError as e: + print("FBI Failed. Possible causes: \n" + + "\tNo DUAs found because taint analysis failed: \n" + "\t\t Ensure PANDA 'saw open of file we want to taint'\n" + "\t\t Make sure target has debug symbols (version2): No 'failed DWARF loading' messages\n" + "\tFBI crashed (bad arguments, config, or other untested code)") raise e - print() progress("Found Bugs, Injectable!!") @@ -275,7 +287,6 @@ def progress(msg): n = db.session.query(Bug).filter(Bug.type == i).count() print("%d\t%d\t%s" % (n, i, Bug.type_strings[i])) - print("total dua:", db.session.query(Dua).count()) print("total atp:", db.session.query(AttackPoint).count()) print("total bug:", db.session.query(Bug).count()) diff --git a/scripts/competition.py b/scripts/competition.py index eae37aeb..5e08f745 100755 --- a/scripts/competition.py +++ b/scripts/competition.py @@ -1,40 +1,27 @@ -#!/usr/bin/python +#!/usr/bin/env python3 import argparse -import atexit import datetime -import json -import lockfile import os -import pipes -import re -import shlex +import random import shutil -import signal -import string import stat -import subprocess32 -import sys -import time -import random - -from math import sqrt -from os.path import basename, dirname, join, abspath, exists +import subprocess +from os.path import basename, join -from vars import parse_vars -from lava import LavaDatabase, Bug, Build, DuaBytes, Run, \ - run_cmd, run_cmd_notimeout, mutfile, inject_bugs, LavaPaths, \ - validate_bugs, run_modified_program, unfuzzed_input_for_bug, \ - fuzzed_input_for_bug, get_trigger_line, AttackPoint, Bug, \ +from lava import LavaDatabase, run_cmd, run_cmd_notimeout, inject_bugs, LavaPaths, \ + validate_bugs, fuzzed_input_for_bug, AttackPoint, Bug, \ get_allowed_bugtype_num, limit_atp_reuse +from vars import parse_vars # from pycparser.diversifier.diversify import diversify -#from process_compile_commands import get_c_files +# from process_compile_commands import get_c_files -version="2.0.0" +version = "2.0.0" RETRY_COUNT = 0 + # Build both scripts - in a seperate fn for testing def run_builds(scripts): for script in scripts: @@ -45,10 +32,11 @@ def run_builds(scripts): raise RuntimeError("Could not build {}".format(script)) print("Built with command {}".format(script)) + def random_choice(options, probs): # Select from options with probabilities from prob sum_probs = sum(probs) - norm_probs = [float(x)/sum_probs for x in probs] + norm_probs = [float(x) / sum_probs for x in probs] r = random.uniform(0, 1) for idx, prb in enumerate(norm_probs): if r < prb: return options[idx] @@ -64,8 +52,8 @@ def random_choice(options, probs): # because otherwise the db might give us all the same dua def competition_bugs_and_non_bugs(limit, db, allowed_bugtypes, buglist): - #XXX This function is prtty gross, definitely needs a rewrite - max_duplicates_per_line = 50 # Max duplicates we *try* to inject per line. After validation, we filter down to ~1 per line + # XXX This function is prtty gross, definitely needs a rewrite + max_duplicates_per_line = 50 # Max duplicates we *try* to inject per line. After validation, we filter down to ~1 per line bugs_and_non_bugs = [] dfl_fileline = {} afl_fileline = {} @@ -81,7 +69,7 @@ def parse(item): """ if not (item.type in allowed_bugtypes): - #print("skipping type {} not in {}".format(item.type, allowed_bugtypes)) + # print("skipping type {} not in {}".format(item.type, allowed_bugtypes)) return True dfl = (item.trigger_lval.loc_filename, item.trigger_lval.loc_begin_line) afl = (item.atp.loc_filename, item.atp.loc_begin_line, item.atp.loc_begin_column) @@ -89,21 +77,23 @@ def parse(item): if not (dfl in dfl_fileline.keys()): dfl_fileline[dfl] = 0 if not (afl in afl_fileline.keys()): afl_fileline[afl] = 0 - if (dfl_fileline[dfl] > max_duplicates_per_line): - #print "skipping dfl %s" % (str(dfl)) + if dfl_fileline[dfl] > max_duplicates_per_line: + # print "skipping dfl %s" % (str(dfl)) return True - if (afl_fileline[afl] > max_duplicates_per_line): - #print "skipping afl %s" % (str(afl)) + if afl_fileline[afl] > max_duplicates_per_line: + # print "skipping afl %s" % (str(afl)) return True if fake: - print "non-bug", + print("non-bug") else: - print "bug ", - print 'id={} dua_fl={} atp_fl={} dua_ast={} type={}'.format(item.id, str(dfl), str(afl), str(item.trigger_lval.ast_name), Bug.type_strings[item.type]) + print("bug ") + print('id={} dua_fl={} atp_fl={} dua_ast={} type={}'.format(item.id, str(dfl), str(afl), + str(item.trigger_lval.ast_name), + Bug.type_strings[item.type])) dfl_fileline[dfl] += 1 afl_fileline[afl] += 1 bugs_and_non_bugs.append(item) - if (len(bugs_and_non_bugs) >= limit): + if len(bugs_and_non_bugs) >= limit: print("Abort bug-selection because we already found {} bugs to inject".format(limit)) return False return True @@ -116,45 +106,50 @@ def parse(item): atp_types = [AttackPoint.FUNCTION_CALL, AttackPoint.POINTER_WRITE] # Get limit bugs at each ATP - #atp_item_lists = db.uninjected_random_by_atp(fake, atp_types=atp_types, allowed_bugtypes=allowed_bugtypes, atp_lim=limit) + # atp_item_lists = db.uninjected_random_by_atp(fake, atp_types=atp_types, allowed_bugtypes=allowed_bugtypes, atp_lim=limit) # Returns list of lists where each sublist corresponds to the same atp: [[ATP1_bug1, ATP1_bug2], [ATP2_bug1], [ATP3_bug1, ATP3_bug2]] - atp_item_lists = db.uninjected_random_by_atp_bugtype(fake, atp_types=atp_types, allowed_bugtypes=allowed_bugtypes, atp_lim=limit) + atp_item_lists = db.uninjected_random_by_atp_bugtype(fake, atp_types=atp_types, + allowed_bugtypes=allowed_bugtypes, atp_lim=limit) # Returns dict of list of lists where each dict is a bugtype and within each, each sublist corresponds to the same atp: [[ATP1_bug1, ATP1_bug2], [ATP2_bug1], [ATP3_bug1, ATP3_bug2]] while True: for selected_bugtype in allowed_bugtypes: - atp_item_lists[selected_bugtype] = [x for x in atp_item_lists[selected_bugtype] if len(x)] # Delete any empty lists + atp_item_lists[selected_bugtype] = [x for x in atp_item_lists[selected_bugtype] if + len(x)] # Delete any empty lists if sum([len(x) for x in atp_item_lists.values()]) == 0: - print("Abort bug-selection because we've selected all {} potential bugs we have (Failed to find all {} requested bugs)".format(len(bugs_and_non_bugs), limit)) + print( + "Abort bug-selection because we've selected all {} potential bugs we have (Failed to find all {} requested bugs)".format( + len(bugs_and_non_bugs), limit)) break # Randomly select a sublist from atp_item_lists (none will be empty) - #weight by bugtype - + # weight by bugtype - # Of the allowed bugtypes, the ratio will be normalized. + # Of the allowed bugtypes, the ratio will be normalized. # As this is now, we'll pick REL_WRITES (multiduas) more often than others because they work less frequently # Ratios for RET_BUFFER and PRINTF_LEAK are just guesses bug_ratios = {Bug.REL_WRITE: 200, Bug.PTR_ADD: 15, Bug.RET_BUFFER: 15, Bug.PRINTF_LEAK: 15} for x in allowed_bugtypes: if x not in bug_ratios: - assert("Bug type {} not in bug_ratios. Fix me!".format(Bug.type_strings[this_bugtype])) + assert ("Bug type {} not in bug_ratios. Fix me!".format(Bug.type_strings[this_bugtype])) allowed_bug_ratios = [bug_ratios[x] for x in allowed_bugtypes] this_bugtype = random_choice(allowed_bugtypes, allowed_bug_ratios) - #print("Selected bugtype {}".format(Bug.type_strings[this_bugtype])) + # print("Selected bugtype {}".format(Bug.type_strings[this_bugtype])) this_bugtype_atp_item_lists = atp_item_lists[this_bugtype] if len(this_bugtype_atp_item_lists) == 0: # TODO: intelligently select a different bugype in this case allowed_bugtypes.remove(this_bugtype) - print("Warning: tried to select a bug of type {} but none available".format(Bug.type_strings[this_bugtype])) - assert(len(allowed_bugtypes) >0), "No bugs available" + print("Warning: tried to select a bug of type {} but none available".format( + Bug.type_strings[this_bugtype])) + assert (len(allowed_bugtypes) > 0), "No bugs available" continue - atp_item_idx = random.randint(0, len(this_bugtype_atp_item_lists)-1) - item = this_bugtype_atp_item_lists[atp_item_idx].pop() # Pop the first bug from that bug_list (Sublist will be sorted randomly) + atp_item_idx = random.randint(0, len(this_bugtype_atp_item_lists) - 1) + item = this_bugtype_atp_item_lists[ + atp_item_idx].pop() # Pop the first bug from that bug_list (Sublist will be sorted randomly) """ # TODO: fix this manual libjpeg hack. Blacklist bugs here by strings in their dua/extra_duas @@ -176,7 +171,7 @@ def parse(item): # End of libjpeg hack """ - abort |= not parse(item) # Once parse returns true, break + abort |= not parse(item) # Once parse returns true, break if abort: break else: @@ -190,7 +185,7 @@ def parse(item): afl = (item.atp.loc_filename, item.atp.loc_begin_line, item.atp.loc_begin_column) if afl not in afls.keys(): afls[afl] = 0 - afls[afl] +=1 + afls[afl] += 1 print("{} potential bugs were selected across {} ATPs:".format(len(bugs_and_non_bugs), len(afls))) for bugtype in allowed_bugtypes: @@ -200,35 +195,36 @@ def parse(item): for atp, count in afls.items(): print("\t{}\t bugs at {}".format(count, atp)) - return [b.id for b in bugs_and_non_bugs] + def main(): parser = argparse.ArgumentParser(prog="competition.py", description='Inject and test LAVA bugs.') - parser.add_argument('host_json', help = 'Host JSON file') - parser.add_argument('project', help = 'Project name') + parser.add_argument('host_json', help='Host JSON file') + parser.add_argument('project', help='Project name') parser.add_argument('-m', '--many', action="store", default=-1, - help = 'Inject this many bugs and this many non-bugs (chosen randomly)') + help='Inject this many bugs and this many non-bugs (chosen randomly)') parser.add_argument('-n', '--minYield', action="store", default=-1, - help = 'Require at least this many real bugs') + help='Require at least this many real bugs') parser.add_argument('-l', '--buglist', action="store", default=False, - help = 'Inject this list of bugs') + help='Inject this list of bugs') parser.add_argument('-e', '--exitCode', action="store", default=0, type=int, - help = ('Expected exit code when program exits without crashing. Default 0')) - #parser.add_argument('-i', '--diversify', action="store_true", default=False, - #help = ('Diversify source code. Default false.')) - parser.add_argument('-c', '--chaff', action="store_true", default=False, # TODO chaf and unvalided bugs aren't always the same thing - help = ('Leave unvalidated bugs in the binary')) + help='Expected exit code when program exits without crashing. Default 0') + # parser.add_argument('-i', '--diversify', action="store_true", default=False, + # help = ('Diversify source code. Default false.')) + parser.add_argument('-c', '--chaff', action="store_true", default=False, + # TODO chaf and unvalided bugs aren't always the same thing + help='Leave unvalidated bugs in the binary') parser.add_argument('-t', '--bugtypes', action="store", default="rel_write", - help = ('bug types to inject')) + help='bug types to inject') parser.add_argument('--version', action="version", version="%(prog)s {}".format(version)) args = parser.parse_args() global project project = parse_vars(args.host_json, args.project) - dataflow = project.get("dataflow", False) # Default to false + dataflow = project.get("dataflow", False) # Default to false allowed_bugtypes = get_allowed_bugtype_num(args) @@ -265,15 +261,16 @@ def main(): ############### if args.buglist: - print ("bug_list incoming %s" % (str(args.buglist))) - bug_list = competition_bugs_and_non_bugs(len(args.buglist), db, allowed_bugtypes, eval(args.buglist)) # XXX EVAL WHY + print("bug_list incoming %s" % (str(args.buglist))) + bug_list = competition_bugs_and_non_bugs(len(args.buglist), db, allowed_bugtypes, + eval(args.buglist)) # XXX EVAL WHY elif args.many: bug_list = competition_bugs_and_non_bugs(int(args.many), db, allowed_bugtypes, None) else: print("Fatal error: no bugs specified") raise RuntimeError - assert len(bug_list) # Found no bugs + assert len(bug_list) # Found no bugs print('bug_list (len={}):'.format(len(bug_list))) bug_list_str = ','.join([str(bug_id) for bug_id in bug_list]) @@ -285,21 +282,21 @@ def main(): real_bug_list = [] # add bugs to the source code and check that we can still compile - (build, input_files, bug_solutions) = inject_bugs(bug_list, db, lp, args.host_json, \ - project, args, False, dataflow=dataflow, competition=True, - validated=False, lavatoolseed=lavatoolseed) - assert build is not None # build is None when injection fails. Could block here to allow for manual patches + (build, input_files, bug_solutions) = inject_bugs(bug_list, db, lp, args.host_json, + project, args, False, dataflow=dataflow, competition=True, + validated=False, lavatoolseed=lavatoolseed) + assert build is not None # build is None when injection fails. Could block here to allow for manual patches # Test if the injected bugs cause approperiate crashes and that our competition infrastructure parses the crashes correctly - real_bug_list = validate_bugs(bug_list, db, lp, project, input_files, build, \ - args, False, competition=True, bug_solutions=bug_solutions) + real_bug_list = validate_bugs(bug_list, db, lp, project, input_files, build, + args, False, competition=True, bug_solutions=bug_solutions) if len(real_bug_list) < int(args.minYield): print("\n\nXXX Yield too low after injection -- Require at least {} bugs for" - " competition, only have {}".format(args.minYield, len(real_bug_list))) + " competition, only have {}".format(args.minYield, len(real_bug_list))) raise RuntimeError("Failure") - print "\n\n Yield acceptable: {}".format(len(real_bug_list)) + print("\n\n Yield acceptable: {}".format(len(real_bug_list))) # TODO- the rebuild process may invalidate a previously validated bug because the trigger will change # Need to find a way to pass data between lavaTool and here so we can reinject *identical* bugs as before @@ -318,31 +315,31 @@ def main(): real_bug_list = limit_atp_reuse(real_bugs) # TODO retry a few times if we fail this test - if bug_list != real_bug_list: # Only reinject if our bug list has changed + if bug_list != real_bug_list: # Only reinject if our bug list has changed if len(real_bug_list) < int(args.minYield): print("\n\nXXX Yield too low after reducing duplicates -- Require at least {} bugs for \ competition, only have {}".format(args.minYield, len(real_bug_list))) raise RuntimeError("Failure") - (build, input_files, bug_solutions) = inject_bugs(real_bug_list, db, lp, args.host_json, \ - project, args, False, dataflow=dataflow, competition=True, validated=True, - lavatoolseed=lavatoolseed) + (build, input_files, bug_solutions) = inject_bugs(real_bug_list, db, lp, args.host_json, + project, args, False, dataflow=dataflow, + competition=True, validated=True, + lavatoolseed=lavatoolseed) - assert build is not None # build is None if injection fails + assert build is not None # build is None if injection fails ############### ## Now build our corpora directory with the buggy source dir, binaries in lava-install-public, ## lava-install-internal, and scripts to rebuild the binaries ############### - corpus_dir = join(compdir, "corpora") - subprocess32.check_call(["mkdir", "-p", corpus_dir]) + subprocess.check_call(["mkdir", "-p", corpus_dir]) # original bugs src dir # directory for this corpus corpname = "lava-corpus-" + ((datetime.datetime.now()).strftime("%Y-%m-%d-%H-%M-%S")) - corpdir = join(corpus_dir,corpname) - subprocess32.check_call(["mkdir", corpdir]) + corpdir = join(corpus_dir, corpname) + subprocess.check_call(["mkdir", corpdir]) lava_bd = join(lp.bugs_parent, lp.source_root) @@ -375,13 +372,13 @@ def main(): popd """.format( bugs_build=bd, - make_clean = project["clean"] if "clean" in project.keys() else "", + make_clean=project["clean"] if "clean" in project.keys() else "", configure=project['configure'] if "configure" in project.keys() else "", - log_make = log_make, - internal_builddir = internal_builddir, - install = project['install'].format(install_dir=lava_installdir), - post_install = project['post_install'] if 'post_install' in project.keys() else "", - )) + log_make=log_make, + internal_builddir=internal_builddir, + install=project['install'].format(install_dir=lava_installdir), + post_install=project['post_install'] if 'post_install' in project.keys() else "", + )) run_builds([log_build_sh]) # diversify @@ -398,7 +395,7 @@ def main(): # ' '.join([ # 'gcc', '-E', '-std=gnu99', # '-I.', '-I..', - # '-I/llvm-3.6.2/Release/lib/clang/3.6.2/include', + # '-I/usr/lib/llvm-11/lib/clang/11/include', # '-o', # '{}.pre'.format(c_file), # c_file])) @@ -411,7 +408,7 @@ def main(): print(o) if rv == 0: print('build succeeded') - subprocess32.check_call(project['install'], cwd=lp.bugs_build, shell=True) + subprocess.check_call(project['install'], cwd=lp.bugs_build, shell=True) if 'post_install' in project: check_call(project['post_install'], cwd=lp.bugs_build, shell=True) else: @@ -437,7 +434,7 @@ def main(): # subdir with trigger inputs inputsdir = join(corpdir, "inputs") - subprocess32.check_call(["mkdir", inputsdir]) + subprocess.check_call(["mkdir", inputsdir]) # subdir with src -- note we can't create it or copytree will fail! srcdir = join(corpdir, "src") # copy src @@ -454,23 +451,23 @@ def main(): predictions.append((prediction, fi, bug.type)) bug_ids.append(bug.id) - print "Answer key:" + print("Answer key:") with open(join(corpdir, "ans"), "w") as ans: for (prediction, fi, bugtype) in predictions: - print "ANSWER [%s] [%s] [%s]" % (prediction, fi, Bug.type_strings[bugtype]) + print("ANSWER [%s] [%s] [%s]" % (prediction, fi, Bug.type_strings[bugtype])) ans.write("%s %s %s\n" % (prediction, fi, Bug.type_strings[bugtype])) with open(join(corpdir, "add_bugs.sql"), "w") as f: f.write("/* This file will add all the generated lava_id values to the DB, you must update binary_id */\n") f.write("\set binary_id 0\n") for bug_id in bug_ids: - f.write("insert into \"bug\" (\"lava_id\", \"binary\") VALUES (%d, :binary_id); \n" % (bug_id)) + f.write("insert into \"bug\" (\"lava_id\", \"binary\") VALUES (%d, :binary_id); \n" % bug_id) # clean up srcdir before tar os.chdir(srcdir) try: # Unconfigure - subprocess32.check_call(["make", "distclean"]) + subprocess.check_call(["make", "distclean"]) except: pass @@ -486,16 +483,16 @@ def main(): os.remove(fname) # build source tar - #tarball = join(srcdir + ".tgz") - #os.chdir(corpdir) - #cmd = "/bin/tar czvf " + tarball + " src" - #subprocess32.check_call(cmd.split()) - #print "created corpus tarball " + tarball + "\n"; + # tarball = join(srcdir + ".tgz") + # os.chdir(corpdir) + # cmd = "/bin/tar czvf " + tarball + " src" + # subprocess.check_call(cmd.split()) + # print "created corpus tarball " + tarball + "\n"; - #lp.bugs_install = join(corpdir,"lava-install") # Change to be in our corpdir + # lp.bugs_install = join(corpdir,"lava-install") # Change to be in our corpdir # Save the commands we use into files so we can rerun later - public_build_sh = join(corpdir, "public_build.sh") # Simple + public_build_sh = join(corpdir, "public_build.sh") # Simple public_builddir = join(corpdir, "lava-install-public") lava_installdir = join(bd, "lava-install") with open(public_build_sh, "w") as build: @@ -515,13 +512,13 @@ def main(): popd """.format( bugs_build=bd, - make_clean = project["clean"] if "clean" in project.keys() else "", + make_clean=project["clean"] if "clean" in project.keys() else "", configure=project['configure'] if "configure" in project.keys() else "", - make = project['make'], - public_builddir = public_builddir, - install = project['install'].format(install_dir=lava_installdir), + make=project['make'], + public_builddir=public_builddir, + install=project['install'].format(install_dir=lava_installdir), post_install=project['post_install'] if "post_install" in project.keys() else "" - )) + )) trigger_all_crashes = join(corpdir, "trigger_crashes.sh") with open(trigger_all_crashes, "w") as build: @@ -558,25 +555,27 @@ def main(): fi fi fi -done""".format(command = project['command'].format(**{"install_dir": "./lava-install-internal", "input_file": "$fname"}), # This syntax is weird but only thing that works? - corpdir = corpdir, - librarydir = join("./lava-install-internal", "lib"), - librarydir2 = join("./lava-install-public", "lib"), - command2 = project['command'].format(**{"install_dir": "./lava-install-public", "input_file": "$fname"}), # This syntax is weird but only thing that works? - inputdir = "./inputs/*-fuzzed-*" - )) +done""".format(command=project['command'].format(**{"install_dir": "./lava-install-internal", "input_file": "$fname"}), + # This syntax is weird but only thing that works? + corpdir=corpdir, + librarydir=join("./lava-install-internal", "lib"), + librarydir2=join("./lava-install-public", "lib"), + command2=project['command'].format(**{"install_dir": "./lava-install-public", "input_file": "$fname"}), + # This syntax is weird but only thing that works? + inputdir="./inputs/*-fuzzed-*" + )) os.chmod(trigger_all_crashes, (stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR | stat.S_IROTH | stat.S_IXOTH)) # Build a version to ship in src run_builds([log_build_sh, public_build_sh]) print("Injected {} bugs".format(len(real_bug_list))) print("Counting how many crashes competition infrastructure identifies...") - run_cmd(trigger_all_crashes, cwd=corpdir) # Prints about segfaults + run_cmd(trigger_all_crashes, cwd=corpdir) # Prints about segfaults (rv, outp) = run_cmd("wc -l {}".format(join(corpdir, "validated_bugs.txt"))) if rv != 0: raise RuntimeError("Validated bugs file does not exist. Something went wrong") - (a,b) = outp[0].split() + (a, b) = outp[0].split() n = int(a) print("\tCompetition infrastructure found: %d of %d injected bugs" % (n, len(real_bug_list))) diff --git a/scripts/competition.sh b/scripts/competition.sh index 0a8756bf..b81061ac 100755 --- a/scripts/competition.sh +++ b/scripts/competition.sh @@ -4,7 +4,6 @@ # Json file required params # # lava: directory of lava repository -# pandahost: what remote host to run panda on trap '' PIPE set -e # Exit on error @@ -103,7 +102,7 @@ mkdir -p $logs lf="$logs/competition.log" progress "competition" 1 "Starting -- logging to $lf" truncate "$lf" -run_remote "$testinghost" "$python $scripts/competition.py -m $num_bugs -n $min_yield $bug_list -e $exit_code $diversify $skipinject --bugtypes=$bugtypes $usechaff $hostjson $project_name" "$lf" +run_remote "$buildhost" "$python $scripts/competition.py -m $num_bugs -n $min_yield $bug_list -e $exit_code $diversify $skipinject --bugtypes=$bugtypes $usechaff $hostjson $project_name" "$lf" progress "competition" 1 "Everything finished." tail -n3 $lf diff --git a/scripts/competition_autotools.py b/scripts/competition_autotools.py index d54556d7..93ea7b23 100644 --- a/scripts/competition_autotools.py +++ b/scripts/competition_autotools.py @@ -3,6 +3,8 @@ #### competition.py is not making this assumption and instead just copying out the lava-install dir ##### +from os.path import join + build_sh = join(corpdir, "build.sh") with open(build_sh, "w") as build: build.write("""#!/bin/bash @@ -16,11 +18,11 @@ {install} popd """.format(configure=project['configure'], - bugs_install=lp.bugs_install, - bugs_build=bd, - make=project['make'], - install=project['install'].format(install_dir=join(corpdir, 'lava-install')), - outdir=join(corpdir, "lava-install"))) + bugs_install=lp.bugs_install, + bugs_build=bd, + make=project['make'], + install=project['install'].format(install_dir=join(corpdir, 'lava-install')), + outdir=join(corpdir, "lava-install"))) log_build_sh = join(corpdir, "log_build.sh") makes = project['make'].split('&&') @@ -49,14 +51,14 @@ popd """.format(configure=project['configure'], - bugs_install = lp.bugs_install, - bugs_build=bd, - log_make = log_make, - install_int = project['install'].format(install_dir=internal_builddir), - intsall_pub = project['install'].format(install_dir=public_builddir), - internal_builddir = internal_builddir, - public_builddir = public_builddir - )) + bugs_install=lp.bugs_install, + bugs_build=bd, + log_make=log_make, + install_int=project['install'].format(install_dir=internal_builddir), + intsall_pub=project['install'].format(install_dir=public_builddir), + internal_builddir=internal_builddir, + public_builddir=public_builddir + )) trigger_all_crashes = join(corpdir, "trigger_crashes.sh") with open(trigger_all_crashes, "w") as build: @@ -69,8 +71,10 @@ done popd - """.format(command = project['command'].format(**{"install_dir": join(corpdir, "lava-install-internal"), "input_file": "$fname"}), # This syntax is weird but only thing that works? - corpdir = corpdir, - librarydir = join(corpdir, "lava-install-internal", "lib"), - inputdir = join(corpdir, "inputs") - )) + """.format(command=project['command'].format( + **{"install_dir": join(corpdir, "lava-install-internal"), "input_file": "$fname"}), + # This syntax is weird but only thing that works? + corpdir=corpdir, + librarydir=join(corpdir, "lava-install-internal", "lib"), + inputdir=join(corpdir, "inputs") + )) diff --git a/scripts/composite.py b/scripts/composite.py index fabcb1b4..5899f2af 100644 --- a/scripts/composite.py +++ b/scripts/composite.py @@ -2,6 +2,7 @@ from sqlalchemy.types import TypeEngine from sqlalchemy.orm import composite + class Composite(object): def __init__(self, *args): arg_idx = 0 diff --git a/scripts/db_spelunk.py b/scripts/db_spelunk.py index 7fd5fbca..a0ffd156 100644 --- a/scripts/db_spelunk.py +++ b/scripts/db_spelunk.py @@ -6,27 +6,26 @@ from db import * - - - -tcns = [1,10,100,1000] -#tcns = [1,4,16,64,256,1024,4096] +tcns = [1, 10, 100, 1000] +# tcns = [1,4,16,64,256,1024,4096] livs = tcns INFINITY = 100000000000000 + # returns true iff i[0] <= x < i[1] def interval_check(x, i): if (i[0] <= x) and (x < i[1]): return True return False + def get_interval(i, partition): - if i==0: - return (0,partition[i]) + if i == 0: + return 0, partition[i] elif i == len(partition): - return (partition[i-1], INFINITY) - return (partition[i-1], partition[i]) + return partition[i - 1], INFINITY + return partition[i - 1], partition[i] def spelunk(json_filename, counts, totals): @@ -40,7 +39,7 @@ def spelunk(json_filename, counts, totals): # write max_liveness & max_tcn to two different files, # one for when we succeed instantiating a segfault ("-sf") # and one for when we don't succeed ("-nf") - (head,tail) = os.path.split(json_filename) + (head, tail) = os.path.split(json_filename) fs = open("%s/%s-res-sf" % (project['directory'], tail), "w") ff = open("%s/%s-res-nf" % (project['directory'], tail), "w") # keep track of unique duas involved in working buffer overflow @@ -54,12 +53,13 @@ def spelunk(json_filename, counts, totals): bug_id = buglist[0] (dua_id, atp_id, inj) = bugs[bug_id] (filename_id, line, lval_id, insertionpoint, file_offset, lval_taint, inputfile_id, \ - max_tcn, max_card, max_liveness, dua_icount, dua_scount, instr) = duas[dua_id] - if (exitcode == -11 or exitcode == -6): + max_tcn, max_card, max_liveness, dua_icount, dua_scount, instr) = duas[dua_id] + if exitcode == -11 or exitcode == -6: fs.write("%d %d\n" % (max_liveness, max_tcn)) # high liveness yet we were able to trigger a segfault? Weird if max_liveness > 100 or max_tcn > 100: - print "run=%d bug=%d dua=%d is weird -- max_tcn=%d max_liveness=%d" % (run_id, bug_id, dua_id, max_tcn, max_liveness) + print("run=%d bug=%d dua=%d is weird -- max_tcn=%d max_liveness=%d" % ( + run_id, bug_id, dua_id, max_tcn, max_liveness)) bo_duas.add(dua_id) bo_atps.add(atp_id) bo_srcfiles.add(filename_id) @@ -74,19 +74,19 @@ def spelunk(json_filename, counts, totals): assert (exitcode2 == 0) fs.close() ff.close() - print "%s -- %d unique srcfiles involved in a validated bug" % (project['name'],len(bo_srcfiles)) - print "%s -- %d unique duas involved in a validated bug" % (project['name'],len(bo_duas)) - print "%s -- %d unique atps involved in a validated bug" % (project['name'],len(bo_atps)) + print("%s -- %d unique srcfiles involved in a validated bug" % (project['name'], len(bo_srcfiles))) + print("%s -- %d unique duas involved in a validated bug" % (project['name'], len(bo_duas))) + print("%s -- %d unique atps involved in a validated bug" % (project['name'], len(bo_atps))) max_tcns = {} max_lvns = {} max_crds = {} - for i in range(1+len(tcns)): + for i in range(1 + len(tcns)): if not (i in counts): counts[i] = {} totals[i] = {} tcn_interval = get_interval(i, tcns) - for j in range(1+len(livs)): + for j in range(1 + len(livs)): # for all runs with max_liveness in liv_interval and max_tcn in tcn_interval # collect counts by exit code if not (j in counts[i]): @@ -95,7 +95,7 @@ def spelunk(json_filename, counts, totals): # for all runs with max_liveness in liv_interval and max_tcn in tcn_interval # collect counts by exit code liv_interval = get_interval(j, livs) - n=0 + n = 0 for run_id in runs.keys(): (build_id, fuzz, exitcode, output_lines, success) = runs[run_id] (buglist, binpath, compiles) = builds[build_id] @@ -104,7 +104,7 @@ def spelunk(json_filename, counts, totals): bug_id = buglist[0] (dua_id, atp_id, inj) = bugs[bug_id] (filename_id, line, lval_id, insertionpoint, file_offset, lval_taint, inputfile_id, \ - max_tcn, max_card, max_liveness, dua_icount, dua_scount, instr) = duas[dua_id] + max_tcn, max_card, max_liveness, dua_icount, dua_scount, instr) = duas[dua_id] if fuzz: if (interval_check(max_liveness, liv_interval)) and (interval_check(max_tcn, tcn_interval)): if not (exitcode in counts[i][j]): @@ -114,48 +114,44 @@ def spelunk(json_filename, counts, totals): totals[i][j] += 1 - - counts = {} totals = {} for json_filename in (sys.argv[1:]): - print "\nspelunk [%s]\n" % json_filename + print("\nspelunk [%s]\n" % json_filename) spelunk(json_filename, counts, totals) - table = [] -for i in range(1+len(tcns)): +for i in range(1 + len(tcns)): tcn_interval = get_interval(i, tcns) row = [] if tcn_interval[1] == INFINITY: row.append("tcn=[%d,+inf]" % tcn_interval[0]) else: - row.append("tcn=[%d,%d)" % (tcn_interval[0],tcn_interval[1])) - for j in range(1+len(livs)): + row.append("tcn=[%d,%d)" % (tcn_interval[0], tcn_interval[1])) + for j in range(1 + len(livs)): liv_interval = get_interval(j, livs) ys = "y=u" - if (totals[i][j] > 0): + if totals[i][j] > 0: nsf = 0 if -11 in counts[i][j]: nsf = counts[i][j][-11] if -6 in counts[i][j]: - nsf += counts[i][j][-6] + nsf += counts[i][j][-6] y = (float(nsf)) / totals[i][j] ys = "y=%.3f" % y - cell = "n=%d %7s" % (totals[i][j],ys) + cell = "n=%d %7s" % (totals[i][j], ys) row.append(cell) table.append(row) - headers = [] -for j in range(1+len(livs)): +for j in range(1 + len(livs)): liv_interval = get_interval(j, livs) if liv_interval[1] == INFINITY: headers.append("liv=[%d..+inf]" % liv_interval[0]) else: headers.append("liv=[%d..%d)" % (liv_interval[0], liv_interval[1])) -#headers = ["liv=[%d..%d)" % l for l in livs] +# headers = ["liv=[%d..%d)" % l for l in livs] -print tabulate(table, headers, tablefmt="grid") +print(tabulate(table, headers, tablefmt="grid")) diff --git a/scripts/demo.py b/scripts/demo.py index 86b19a3e..c97a05cc 100644 --- a/scripts/demo.py +++ b/scripts/demo.py @@ -5,7 +5,7 @@ import json import sys import curses -import subprocess32 as sb +import subprocess as sb from os.path import basename, join from random import random, randrange @@ -15,12 +15,15 @@ log = open("/tmp/lava-demo-{}.log".format(getpass.getuser()), "w") log_ind = 0 + + def logp(msg): global log_ind log.write("%d: %s\n" % (log_ind, msg)) log.flush() log_ind += 1 + start_time = time.time() project = json.load(open(sys.argv[1])) @@ -41,6 +44,7 @@ def logp(msg): target_name, basename(project['inputs'][0]))) logp(plog) + # returns when this file exists on this host def wait_for_file(filename): while True: @@ -48,6 +52,7 @@ def wait_for_file(filename): return time.sleep(0.1) + # returns true if pattern is in file else false def find_in_file(pattern, filename): with open(filename) as f: @@ -59,6 +64,7 @@ def find_in_file(pattern, filename): return True return res is not None + # returns either None # or a list of matches def find_in_file_extract(pattern, filename): @@ -73,35 +79,42 @@ def find_in_file_extract(pattern, filename): res.append(foo.groups()[0]) return res + # check for pattern in hostfile and return true if its there def check_for(pattern, hostfile): return find_in_file(pattern, hostfile) + def wait_for(pattern, hostfile): while True: if check_for(pattern, hostfile): return time.sleep(0.05) + # extracts last def extract_float(pattern, hostfile): assert check_for(pattern, hostfile) res = find_in_file_extract(pattern, hostfile) return float(res[-1]) + def extract_int(pattern, hostfile): res = find_in_file_extract(pattern, hostfile) return int(res[-1]) + def addstr(lock, mon, r, c, s, *args): with lock: for i, line in enumerate(s.splitlines()): mon.addstr(r + i, c, line, *args) mon.refresh() + def addstr_bold(lock, mon, r, c, s): addstr(lock, mon, r, c, s, curses.A_BOLD) + def smoke(origin_r, origin_c, current_smoke): result = [] for i, (digit, dr, dc) in enumerate(current_smoke): @@ -113,6 +126,7 @@ def smoke(origin_r, origin_c, current_smoke): result.append((str(randrange(0, 2)), origin_r, origin_c)) return result + volcano_str = """\ _ / \\ @@ -124,6 +138,8 @@ def smoke(origin_r, origin_c, current_smoke): / / / \ \\ / / / / \\ """ + + def volcano(lock, mon, done_event): r, c = 19, 60 addstr_bold(lock, mon, r, c, volcano_str) @@ -136,24 +152,25 @@ def volcano(lock, mon, done_event): addstr(lock, mon, int(digit_r), int(digit_c), digit) time.sleep(0.2) + def main_thread(lock, mon, done_event): - v0=2 + v0 = 2 addstr(lock, mon, v0, 11, "LAVA: Large-scale Automated Vulnerability Addition", curses.A_BOLD) - addstr(lock, mon, v0+1, 17, "target: %s" % target_name) + addstr(lock, mon, v0 + 1, 17, "target: %s" % target_name) - v1=5 + v1 = 5 # stage 1 -- instrument source wait_for_file(add_queries_log) # ok the add queries log file at least exists - addstr(lock, mon, v1+0, 15, "1. Instrument source w/") - addstr(lock, mon, v1+1, 15, " dynamic queries & make") + addstr(lock, mon, v1 + 0, 15, "1. Instrument source w/") + addstr(lock, mon, v1 + 1, 15, " dynamic queries & make") # get source lines of code sb.check_call(["tar", "-xf", project['tarfile'], '-C', '/tmp']) outp = sb.check_output(['sloccount', "/tmp/%s" % target_name]) for line in outp.split("\n"): foo = re.search("^ansic:\s+([0-9]+) ", line) if foo: - addstr(lock, mon, v0+1, 42, "sloc: " + foo.groups()[0]) + addstr(lock, mon, v0 + 1, 42, "sloc: " + foo.groups()[0]) time.sleep(0.1) @@ -173,8 +190,8 @@ def main_thread(lock, mon, done_event): natp = 0 for n in res: natp += int(n) - addstr(lock, mon, v1, 48, "taint queries: %d" % ntq) - addstr(lock, mon, v1+1, 48, " atp queries: %d" % natp) + addstr(lock, mon, v1, 48, "taint queries: %d" % ntq) + addstr(lock, mon, v1 + 1, 48, " atp queries: %d" % natp) time.sleep(0.1) @@ -186,16 +203,16 @@ def main_thread(lock, mon, done_event): tm = extract_float(pattern, make_log) - addstr(lock, mon, v1, 4, "%4.2fs" % (ti+tm)) + addstr(lock, mon, v1, 4, "%4.2fs" % (ti + tm)) -# addstr(lock, mon, 9, 4, "%4.2fs" % tm) + # addstr(lock, mon, 9, 4, "%4.2fs" % tm) time.sleep(0.1) # stage 2 -- run instr program & record - v2=8 + v2 = 8 wait_for_file(bug_mining_log) - addstr(lock, mon, v2, 15, "2. Record run of") - addstr(lock, mon, v2+1, 15, " instrumented program") + addstr(lock, mon, v2, 15, "2. Record run of") + addstr(lock, mon, v2 + 1, 15, " instrumented program") pattern = "panda record complete ([0-9\.]+) seconds" wait_for(pattern, bug_mining_log) tr = extract_float(pattern, bug_mining_log) @@ -204,9 +221,9 @@ def main_thread(lock, mon, done_event): # stage 3 -- replay + taint v3 = 11 pattern = "Starting first and only replay" - wait_for(pattern,bug_mining_log) + wait_for(pattern, bug_mining_log) addstr(lock, mon, v3, 15, "3. Replay with taint") - addstr(lock, mon, v3+1, 15, " propagation") + addstr(lock, mon, v3 + 1, 15, " propagation") done = False while not done: @@ -216,11 +233,11 @@ def main_thread(lock, mon, done_event): pattern = "([0-9\.]+)\%\) instr" if (check_for(pattern, bug_mining_log)): perc = extract_float(pattern, bug_mining_log) - addstr(lock, mon, v3+1, 35, " %4.2f%%" % perc) + addstr(lock, mon, v3 + 1, 35, " %4.2f%%" % perc) time.sleep(0.11) - addstr(lock, mon, v3+1, 35, " 100.00%") + addstr(lock, mon, v3 + 1, 35, " 100.00%") time.sleep(0.11) - addstr(lock, mon, v3+1, 35, " ") + addstr(lock, mon, v3 + 1, 35, " ") # interestiing stats pattern = ":\s*([0-9]+) instrs total" @@ -236,16 +253,16 @@ def main_thread(lock, mon, done_event): # figure out how big plog is assert os.path.isfile(plog) plogsize = os.stat(plog).st_size - addstr(lock, mon, v3+1, 48, " plog: %d" % plogsize) + addstr(lock, mon, v3 + 1, 48, " plog: %d" % plogsize) time.sleep(0.11) # stage 4 -- fbi v4 = 16 - addstr(lock, mon, v4, 15, "4. Analyze taint & find") - addstr(lock, mon, v4+1, 15, " bug inject sites") + addstr(lock, mon, v4, 15, "4. Analyze taint & find") + addstr(lock, mon, v4 + 1, 15, " bug inject sites") # poll db to find out how many dua and atp we have -# first_db = True + # first_db = True last_num_dua = 0 last_num_atp = 0 last_num_bug = 0 @@ -257,15 +274,15 @@ def main_thread(lock, mon, done_event): num_dua = db.session.query(Dua).count() num_atp = db.session.query(AttackPoint).count() num_bug = db.session.query(Bug).count() -# if first_db and (num_dua > 0 or num_atp > 0 or num_bug > 0): -# addstr(lock, mon, v4, 48, "Database") -# first_db = False + # if first_db and (num_dua > 0 or num_atp > 0 or num_bug > 0): + # addstr(lock, mon, v4, 48, "Database") + # first_db = False if num_dua != last_num_dua: addstr(lock, mon, v4, 48, " DUAs: %d" % num_dua) if num_atp != last_num_atp: - addstr(lock, mon, v4+1, 48, " ATPs: %d" % num_atp) + addstr(lock, mon, v4 + 1, 48, " ATPs: %d" % num_atp) if num_bug != last_num_bug: - addstr(lock, mon, v4+2, 48, "pBUGs: %d" % num_bug) + addstr(lock, mon, v4 + 2, 48, "pBUGs: %d" % num_bug) last_num_dua = num_dua last_num_atp = num_atp last_num_bug = num_bug @@ -275,16 +292,16 @@ def main_thread(lock, mon, done_event): addstr(lock, mon, v4, 4, "%4.2fs" % tf) # stage 5 inj - v5=20 + v5 = 20 for trial in range(1, 2): # inject trial $trial lf = join(log_dir, "inject-{}.log".format(trial)) logp(str(trial)) wait_for_file(lf) if trial == 1: - addstr(lock, mon, v5, 15, "5. Inject bugs &") - addstr(lock, mon, v5+1, 15, " validate") - vt=v5+2+trial + addstr(lock, mon, v5, 15, "5. Inject bugs &") + addstr(lock, mon, v5 + 1, 15, " validate") + vt = v5 + 2 + trial addstr(lock, mon, vt, 15, " trial %d (100 bugs):" % trial) logp("select") @@ -330,17 +347,17 @@ def main_thread(lock, mon, done_event): src_dir = join(project_dir, 'bugs', '0', target_name) install_dir = join(src_dir, 'lava-install') for bug in last_build.bugs: - if db.session.query(Run)\ - .filter(Run.fuzzed == bug)\ - .filter(Run.build == last_build)\ - .filter(Run.exitcode.in_([134, 139, -6, -11]))\ + if db.session.query(Run) \ + .filter(Run.fuzzed == bug) \ + .filter(Run.build == last_build) \ + .filter(Run.exitcode.in_([134, 139, -6, -11])) \ .count() > 0: unfuzzed_input = join(project_dir, 'inputs', basename(project['inputs'][0])) suff = get_suffix(unfuzzed_input) pref = unfuzzed_input[:-len(suff)] if suff != "" else unfuzzed_input fuzzed_input = "{}-fuzzed-{}{}".format(pref, bug.id, suff) cmd = project['command'].format(input_file=fuzzed_input, install_dir=install_dir) - script = "echo RUNNING COMMAND for bug {}:; echo; echo FUZZED INPUT {}; echo; echo -n 'md5sum '; md5sum {}; echo; echo {}; echo; echo; LD_LIBRARY_PATH={} {}; /bin/sleep 1000"\ + script = "echo RUNNING COMMAND for bug {}:; echo; echo FUZZED INPUT {}; echo; echo -n 'md5sum '; md5sum {}; echo; echo {}; echo; echo; LD_LIBRARY_PATH={} {}; /bin/sleep 1000" \ .format(bug.id, fuzzed_input, fuzzed_input, cmd, join(install_dir, 'lib'), cmd) terminals.append(sb.Popen( ['gnome-terminal', '--geometry=60x24', '-x', 'bash', '-c', script] @@ -348,21 +365,25 @@ def main_thread(lock, mon, done_event): try: while True: pass - except KeyboardInterrupt: pass + except KeyboardInterrupt: + pass done_event.set() - try: sb.check_call(['killall', 'sleep']) - except sb.CalledProcessError: pass + try: + sb.check_call(['killall', 'sleep']) + except sb.CalledProcessError: + pass + def monitor_lava(stdscr): curses.curs_set(0) assert curses.has_colors() mon = curses.newwin(30, 80, 4, 4) - mon.hline(0,1,'-',78) - mon.hline(29,1,'-',78) - mon.vline(1,0,'|',28) - mon.vline(1,79,'|',28) + mon.hline(0, 1, '-', 78) + mon.hline(29, 1, '-', 78) + mon.vline(1, 0, '|', 28) + mon.vline(1, 79, '|', 28) lock = Lock() done_event = Event() @@ -378,4 +399,5 @@ def monitor_lava(stdscr): volcano_thread.join(1) + curses.wrapper(monitor_lava) diff --git a/scripts/docker-shell.sh b/scripts/docker-shell.sh index ffef8cab..91ba3c12 100755 --- a/scripts/docker-shell.sh +++ b/scripts/docker-shell.sh @@ -1,17 +1,22 @@ #!/bin/bash -# Single argument of project name will get container name +# Single argument of project name will get Docker name # from project config. Then 2nd optional argument is command to run # With no arguments, just give us a shell lava="$(dirname $(dirname $(readlink -f $0)))" -if [ "$#" -eq 0 ]; then - container="lava32" -else +# This project_name is a dummy value, we just want shell access +project_name="toy" +. `dirname $0`/vars.sh + +echo "You are connecting to the Docker container: ${dockername}" + +if [ "$#" -ne 0 ]; then project_name=$1 + echo "using project ${project_name}" cmd="${@:2}" -#Container name (lava32 or lava32debug) comes from config + # Docker name (lava32 or lava32debug) comes from config . `dirname $0`/vars.sh docker_map_args="-v $tarfiledir:$tarfiledir" @@ -19,13 +24,14 @@ else docker_map_args="$docker_map_args -v $directory:$directory" fi - if ! ( docker images ${container} | grep -q ${container} ); then - docker build -t ${container} "$(dirname $(dirname $(readlink -f $0)))/docker/debug" + if ! ( docker images ${dockername} | grep -q ${dockername} ); then + docker build -t ${dockername} "$(dirname $(dirname $(readlink -f $0)))/docker/debug" fi - - [ "$extradockerargs" = "null" ] && extradockerargs=""; +else + echo "No extra args" fi +[ "$extradockerargs" = "null" ] && extradockerargs=""; whoami="$(whoami)" path="" cmd="sudo -u $whoami bash -c -- \"$cmd\"" @@ -57,4 +63,4 @@ docker run --rm -it \ --cap-add=SYS_PTRACE \ $docker_map_args \ $extradockerargs \ - ${container} sh -c "trap '' PIPE; $cmd" + ${dockername} sh -c "trap '' PIPE; $cmd" diff --git a/scripts/dua_atp_analysis_gdb.py b/scripts/dua_atp_analysis_gdb.py index ac558d98..a8ad2429 100644 --- a/scripts/dua_atp_analysis_gdb.py +++ b/scripts/dua_atp_analysis_gdb.py @@ -10,13 +10,13 @@ try: import gdb except: - print "Either your gdb is not > gdb 7" - print "Or you are trying to run this without gdb" - print "Exiting . . ." + print("Either your gdb is not > gdb 7") + print("Or you are trying to run this without gdb") + print("Exiting . . .") sys.exit(1) if not ("DUA" in os.environ and "ATP" in os.environ): - print "Must define DUA and ATP breakpoint locations. Exiting . . ." + print("Must define DUA and ATP breakpoint locations. Exiting . . .") sys.exit(1) # bp_num is int @@ -79,31 +79,31 @@ def handle_bp_event (): b = event.breakpoints[0] if b.number == 1: # we are at the dua - print "== HIT DUA, ENABLING ATP == . . ." + print("== HIT DUA, ENABLING ATP == . . .") gdb.execute("disable 1") gdb.execute("enable 2") elif b.number == 2: # we are at the attack point - print "== HIT DUA-ATP SEQUENCE, SUCCESS! ==" + print("== HIT DUA-ATP SEQUENCE, SUCCESS! ==") gdb.execute("disable 2") elif b.location == EXIT_LOC: - print "At program exit normal with status:" + print("At program exit normal with status:") # status will usually be in eax variable for 32 bit systems # or maybe it's in $esp + 4 gdb.execute("p $eax") gdb.execute("x/xw $esp+4") - print "DUA HITS: {}".format(get_bp_hits(1)) - print "ATP HITS: {}".format(get_bp_hits(2)) + print("DUA HITS: {}".format(get_bp_hits(1))) + print("ATP HITS: {}".format(get_bp_hits(2))) gdb.execute("q") def handle_sig_event (): if -11 == event.stop_signal: - print "Found a seg fault" - print "DUA HITS: {}".format(get_bp_hits(1)) - print "ATP HITS: {}".format(get_bp_hits(2)) + print("Found a seg fault") + print("DUA HITS: {}".format(get_bp_hits(1))) + print("ATP HITS: {}".format(get_bp_hits(2))) gdb.execute("q") else: - print "Reached unhandled signal event: {}".format(event.stop_signal) + print("Reached unhandled signal event: {}".format(event.stop_signal)) # print "event handler type: stop with signal{}".format(event.stop_signal) # print event.breakpoints #launch_debug_using_ipython() diff --git a/scripts/fninstr.py b/scripts/fninstr.py index 6cf73b07..7746e752 100644 --- a/scripts/fninstr.py +++ b/scripts/fninstr.py @@ -1,8 +1,6 @@ -import sys -import yaml -import re -import cPickle as pickle import argparse +import pickle +import yaml debug = True @@ -13,18 +11,20 @@ # TODO: parameterize this IGNORE_FN_PTRS = False - -parser = argparse.ArgumentParser(description='Use output of LavaFnTool to figure out which parts of preproc code to instrument') +parser = argparse.ArgumentParser( + description='Use output of LavaFnTool to figure out which parts of preproc code to instrument') # TODO use vars.py to figure this out instead of arguments parser.add_argument('-d', '--dataflow', action="store_true", default=False, help="lava is using dataflow") +parser.add_argument('-r', '--read', action="store_true", default=True, + help="Read the LAVA output YAML file") parser.add_argument('-i', '--input', action="store", default=None, help="name of input yaml file from LavaFnTool") parser.add_argument('-o', '--output', action="store", default=None, help="name of output yaml file containing instrumentation decisions") -(args,rest) = parser.parse_known_args() +(args, rest) = parser.parse_known_args() data_flow = args.dataflow @@ -36,7 +36,7 @@ def parse_fundecl(fd): ext = fd['extern'] else: ext = None - return (ext, ret_type, params) + return ext, ret_type, params def check_start_end(x): @@ -45,7 +45,7 @@ def check_start_end(x): f1 = start.split(":")[0] f2 = end.split(":")[0] assert (f1 == f2) - return (f1, start, end, start==end) + return f1, start, end, start == end class Function: @@ -64,7 +64,7 @@ class FnPtrAssign: def __init__(self, fpa): (self.filename, self.start, self.end, see) = check_start_end(fpa) (self.extern, self.ret_type, self.params) = parse_fundecl(fpa['fundecl']) - # this is the value being assigned to the fn ptr, i.e. the RHS + # this is the value being assigned to the fn ptr, i.e., the RHS self.name = fpa['name'] assert (not see) @@ -88,7 +88,8 @@ def __init__(self, call): calls = {} fpas = {} -def addtohl(h,k,v): + +def addtohl(h, k, v): if not (k in h): h[k] = [] h[k].append(v) @@ -102,13 +103,15 @@ def merge(v, vors): assert (v is None) return vors + v -if True: + +if args.read: for filename in rest: - print "FILE [%s] " % filename - y = yaml.load(open(filename)) - assert(y is not None), "Missing output file from fninstr" + print("FILE [%s] " % filename) + with open(filename, 'r') as file: + y = yaml.safe_load(file) + assert (y is not None), "Missing output file from fninstr" for x in y: - # print x + # print x if 'fun' in x: fd = Function(x['fun']) if fd.start == fd.end: @@ -124,19 +127,18 @@ def merge(v, vors): fpa = FnPtrAssign(x['fnPtrAssign']) addtohl(fpas, fpa.name, fpa) - f = open("getfns.pickle", "w") - pickle.dump(fundefs, f) - pickle.dump(prots, f) - pickle.dump(calls, f) - pickle.dump(fpas, f) - f.close() + with open("getfns.pickle", "wb") as f: + pickle.dump(fundefs, f) + pickle.dump(prots, f) + pickle.dump(calls, f) + pickle.dump(fpas, f) + else: - f = open("getfns.pickle", "r") - fundefs = pickle.load(f) - prots = pickle.load(f) - calls = pickle.load(f) - fpas = pickle.load(f) - f.close() + with open("getfns.pickle", "rb") as f: + fundefs = pickle.load(f) + prots = pickle.load(f) + calls = pickle.load(f) + fpas = pickle.load(f) """ @@ -155,13 +157,13 @@ def merge(v, vors): fns_passed_as_args = {} for name in prots.keys(): all_fns.add(name) -print "%d fn names in prots" % (len(all_fns)) +print("%d fn names in prots" % (len(all_fns))) for name in fundefs.keys(): all_fns.add(name) -print "%d fn names in prots+fundefs" % (len(all_fns)) +print("%d fn names in prots+fundefs" % (len(all_fns))) for name in calls.keys(): all_fns.add(name) -print "%d fn names in prots+fundefs+calls" % (len(all_fns)) +print("%d fn names in prots+fundefs+calls" % (len(all_fns))) for name in calls.keys(): # examine args in each of the calls # to see if any are passing a function or fn pointer @@ -174,8 +176,7 @@ def merge(v, vors): all_fns.add(arg['name']) addtohl(fns_passed_as_args, arg['name'], call.name) -print "%d fn names in prots+fundefs+calls+callargs" % (len(all_fns)) - +print("%d fn names in prots+fundefs+calls+callargs" % (len(all_fns))) """ @@ -226,15 +227,14 @@ def merge(v, vors): assert fd.hasbody instr_judgement[name] = OKI if debug: - print "Instr candidate %s has body" % name + print("Instr candidate %s has body" % name) break else: # we have no fundec for this fn, thus definitely no body. # so don't instrument instr_judgement[name] = DIB | DADFA if debug: - print "Won't instrument %s (data_flow) since we don't have body" % name - + print("Won't instrument %s (data_flow) since we don't have body" % name) instr = set() for name in instr_judgement.keys(): @@ -258,7 +258,6 @@ def merge(v, vors): if not (disposition is OKI): instr_judgement[name] = disposition - """ Make another pass to see if there are any fns assigned to fnptrs If so, (for now) we won't inject in them since we can't control the @@ -266,7 +265,7 @@ def merge(v, vors): """ if IGNORE_FN_PTRS: for name in fpas: - instr_judgement[name] |= DADFA | DIB + instr_judgement[name] |= DADFA | DIB # Ok we have a list of instrumentable functions. # Now, we need to transitively close. @@ -276,20 +275,19 @@ def merge(v, vors): while any_change: any_change = False for called_fn_name in calls.keys(): - if (instr_judgement[called_fn_name] is OKI): + if instr_judgement[called_fn_name] is OKI: # We 'think' we can instrument called_fn_name for call in calls[called_fn_name]: - if (not (instr_judgement[call.containing_function] is OKI)): + if not (instr_judgement[call.containing_function] is OKI): # ... however, it is called from a function that cant be instrumented # thus it cant really be instrumented. any_change = True - print "Cant instrument %s because its called from %s which we can't instrument" % (called_fn_name, call.containing_function) + print("Cant instrument %s because its called from %s which we can't instrument" % ( + called_fn_name, call.containing_function)) instr_judgement[called_fn_name] = DIB | DADFA break if any_change: - print "instr_judgement changed. Iterating." - - + print("instr_judgement changed. Iterating.") ninstr = {} for name in instr: @@ -298,24 +296,15 @@ def merge(v, vors): ninstr[disp] = 0 ninstr[disp] += 1 - for i in range(4): if i in ninstr: - print "instrflags=%d: count=%d" % (i, ninstr[i]) - -for name in instr_judgement.keys(): - if instr_judgement[name] == OKI: - print "Intrumenting fun [%s]" % name - + print("instrflags=%d: count=%d" % (i, ninstr[i])) - - -f = open(args.output, "w") for name in instr_judgement.keys(): if instr_judgement[name] == OKI: - f.write("NOFILENAME %s\n" % name) -f.close() - - - + print("Intrumenting fun [%s]" % name) +with open(args.output, "w") as f: + for name in instr_judgement.keys(): + if instr_judgement[name] == OKI: + f.write("NOFILENAME %s\n" % name) diff --git a/scripts/funcs.sh b/scripts/funcs.sh index 9ed591bd..24693590 100755 --- a/scripts/funcs.sh +++ b/scripts/funcs.sh @@ -69,6 +69,7 @@ if [ -z "$LAVA_FUNCS_INCLUDED" ]; then bash -c "$command" >> "$logfile" 2>&1 elif [ "$remote_machine" == "docker" ]; then echo docker run $dockername sh -c "$command" + DOCKER_IP=$(ifconfig docker0 | grep 'inet ' | awk '{print $2}') docker run --rm -it \ -e "HTTP_PROXY=$HTTP_PROXY" \ -e "HTTPS_PROXY=$HTTPS_PROXY" \ @@ -81,7 +82,8 @@ if [ -z "$LAVA_FUNCS_INCLUDED" ]; then -v /etc/shadow:/etc/shadow:ro \ -v /etc/gshadow:/etc/gshadow:ro \ -v /home:/home:ro \ - --add-host=database:172.17.0.1 \ + -v $HOME/.panda:$HOME/.panda \ + --add-host=database:$DOCKER_IP \ $docker_map_args \ $extradockerargs \ $dockername sh -c "trap '' PIPE; su -l $(whoami) -c \"$command\"" \ diff --git a/scripts/get-clang.sh b/scripts/get-clang.sh deleted file mode 100755 index 3b980a5a..00000000 --- a/scripts/get-clang.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash - -#release="3.8.0" -release="3.6.2" -llvm_version="llvm-$release" - -wget http://llvm.org/releases/$release/$llvm_version.src.tar.xz -tar -xJf $llvm_version.src.tar.xz - -mv $llvm_version.src $llvm_version -cd $llvm_version - -pushd tools - -clang_version="cfe-$release" -wget http://llvm.org/releases/$release/$clang_version.src.tar.xz -tar -xJf $clang_version.src.tar.xz -mv $clang_version.src clang - -pushd clang/tools -wget http://llvm.org/releases/$release/clang-tools-extra-$release.src.tar.xz -tar -xJf clang-tools-extra-$release.src.tar.xz -mv clang-tools-extra-$release.src extra -popd - -popd - -./configure --enable-optimized --disable-assertions --enable-targets=x86,arm --enable-shared --enable-pic --host=$(gcc -dumpmachine) --build=$(gcc -dumpmachine) -REQUIRES_RTTI=1 make -j $(nproc) diff --git a/scripts/inject.py b/scripts/inject.py index a32384c0..1dbe7eb3 100755 --- a/scripts/inject.py +++ b/scripts/inject.py @@ -1,8 +1,7 @@ -#!/usr/bin/python +#!/usr/bin/env python3 import argparse import atexit -import json import lockfile import os import signal @@ -13,19 +12,20 @@ from vars import parse_vars from lava import LavaDatabase, Run, Bug, \ - inject_bugs, LavaPaths, validate_bugs, \ - get_bugs, run_cmd, get_allowed_bugtype_num + inject_bugs, LavaPaths, validate_bugs, \ + get_bugs, run_cmd, get_allowed_bugtype_num start_time = time.time() debugging = False -version="2.0.0" +version = "2.0.0" + # get list of bugs either from cmd line or db def get_bug_list(args, db, allowed_bugtypes): update_db = False - print "Picking bugs to inject." + print("Picking bugs to inject.") sys.stdout.flush() bug_list = [] @@ -33,20 +33,20 @@ def get_bug_list(args, db, allowed_bugtypes): bug_id = int(args.bugid) bug_list.append(bug_id) elif args.randomize: - print "Remaining to inj:", db.uninjected().count() - print "Using strategy: random" + print("Remaining to inj:", db.uninjected().count()) + print("Using strategy: random") bug = db.next_bug_random(False) bug_list.append(bug.id) update_db = True elif args.buglist: - bug_list = eval(args.buglist) # TODO + bug_list = eval(args.buglist) # TODO update_db = False elif args.many: num_bugs_to_inject = int(args.many) huge = db.huge() - available = "tons" if huge else db.uninjected().count() # Only count if not huge - print "Selecting %d bugs for injection of %s available" % (num_bugs_to_inject, str(available)) + available = "tons" if huge else db.uninjected().count() # Only count if not huge + print("Selecting %d bugs for injection of %s available" % (num_bugs_to_inject, str(available))) if not huge: assert available >= num_bugs_to_inject @@ -57,7 +57,7 @@ def get_bug_list(args, db, allowed_bugtypes): bugs_to_inject = db.uninjected_random_limit(allowed_bugtypes=allowed_bugtypes, count=num_bugs_to_inject) bug_list = [b.id for b in bugs_to_inject] - print "%d is size of bug_list" % (len(bug_list)) + print("%d is size of bug_list" % (len(bug_list))) update_db = True else: assert False @@ -72,7 +72,7 @@ def get_bugs_parent(lp): bugs_parent = "" candidate = 0 bugs_lock = None - print "Getting locked bugs directory..." + print("Getting locked bugs directory...") sys.stdout.flush() while bugs_parent == "": @@ -94,46 +94,46 @@ def get_bugs_parent(lp): for sig in [signal.SIGINT, signal.SIGTERM]: signal.signal(sig, lambda s, f: sys.exit(0)) - print "Using dir", bugs_parent + print("Using dir", bugs_parent) lp.set_bugs_parent(bugs_parent) return bugs_parent + if __name__ == "__main__": update_db = False parser = argparse.ArgumentParser(description='Inject and test LAVA bugs.') - parser.add_argument('host_json', help = 'Host JSON file') - parser.add_argument('project', help = 'Project name') + parser.add_argument('host_json', help='Host JSON file') + parser.add_argument('project', help='Project name') parser.add_argument('-b', '--bugid', action="store", default=-1, - help = 'Bug id (otherwise, highest scored will be chosen)') - parser.add_argument('-r', '--randomize', action='store_true', default = False, - help = 'Choose the next bug randomly rather than by score') + help='Bug id (otherwise, highest scored will be chosen)') + parser.add_argument('-r', '--randomize', action='store_true', default=False, + help='Choose the next bug randomly rather than by score') parser.add_argument('-m', '--many', action="store", default=-1, - help = 'Inject this many bugs (chosen randomly)') + help='Inject this many bugs (chosen randomly)') parser.add_argument('-l', '--buglist', action="store", default=False, - help = 'Inject this list of bugs') + help='Inject this list of bugs') parser.add_argument('-k', '--knobTrigger', metavar='int', type=int, action="store", default=0, - help = 'specify a knob trigger style bug, eg -k [sizeof knob offset]') + help='specify a knob trigger style bug, eg -k [sizeof knob offset]') parser.add_argument('-s', '--skipInject', action="store", default=False, - help = 'skip the inject phase and just run the bugged binary on fuzzed inputs') + help='skip the inject phase and just run the bugged binary on fuzzed inputs') parser.add_argument('-nl', '--noLock', action="store_true", default=False, - help = ('No need to take lock on bugs dir')) + help='No need to take lock on bugs dir') parser.add_argument('-c', '--checkStacktrace', action="store_true", default=False, - help = ('When validating a bug, make sure it manifests at same line as lava-inserted trigger')) + help='When validating a bug, make sure it manifests at same line as lava-inserted trigger') parser.add_argument('-e', '--exitCode', action="store", default=0, type=int, - help = ('Expected exit code when program exits without crashing. Default 0')) + help='Expected exit code when program exits without crashing. Default 0') parser.add_argument('-bb', '--balancebugtype', action="store_true", default=False, - help = ('Attempt to balance bug types, i.e. inject as many of each type')) + help='Attempt to balance bug types, i.e. inject as many of each type') parser.add_argument('-competition', '--competition', action="store_true", default=False, - help = ('Inject in competition mode where logging will be added in #IFDEFs')) + help='Inject in competition mode where logging will be added in #IFDEFs') parser.add_argument("-fixups", "--fixupsscript", action="store", default=False, - help = ("script to run after injecting bugs into source to fixup before make")) -# parser.add_argument('-wl', '--whitelist', action="store", default=None, -# help = ('White list file of functions to bug and data flow')) + help="script to run after injecting bugs into source to fixup before make") + # parser.add_argument('-wl', '--whitelist', action="store", default=None, + # help = ('White list file of functions to bug and data flow')) parser.add_argument('-t', '--bugtypes', action="store", default="ptr_add,rel_write", - help = ('bug types to inject')) + help='bug types to inject') parser.add_argument('--version', action="version", version="%(prog)s {}".format(version)) - args = parser.parse_args() global project project = parse_vars(args.host_json, args.project) @@ -141,7 +141,7 @@ def get_bugs_parent(lp): allowed_bugtypes = get_allowed_bugtype_num(args) - print "allowed bug types: " + (str(allowed_bugtypes)) + print("allowed bug types: " + (str(allowed_bugtypes))) # Set various paths lp = LavaPaths(project) @@ -150,7 +150,8 @@ def get_bugs_parent(lp): try: os.makedirs(lp.bugs_top_dir) - except Exception: pass + except Exception: + pass # this is where buggy source code will be bugs_parent = get_bugs_parent(lp) @@ -158,14 +159,14 @@ def get_bugs_parent(lp): # Remove all old YAML files run_cmd(["rm -f {}/*.yaml".format(lp.bugs_build)], None, 10, cwd="/", shell=True) - # obtain list of bugs to inject based on cmd-line args and consulting db (update_db, bug_list) = get_bug_list(args, db, allowed_bugtypes) # add all those bugs to the source code and check that it compiles - # TODO use bug_solutions and make inject_bugs return solutions for single-dua bugs? + # TODO use bug_solutions and make inject_bugs return solutions for single-dua bugs? (build, input_files, bug_solutions) = inject_bugs(bug_list, db, lp, args.host_json, - project, args, update_db, dataflow=dataflow, competition=args.competition) + project, args, update_db, dataflow=dataflow, + competition=args.competition) if build is None: raise RuntimeError("LavaTool failed to build target binary") @@ -188,19 +189,20 @@ def count_bug_types(id_list): print("%d c(%s)=%d" % (t, Bug.type_strings[t], tcount[t])) print(str(buglist[t])) - print "\nBug types in original, potential set" + + print("\nBug types in original, potential set") count_bug_types(bug_list) - print "\nBug types in validated set" + print("\nBug types in validated set") count_bug_types(real_bug_list) except Exception as e: - print "TESTING FAIL" + print("TESTING FAIL") if update_db: db.session.add(Run(build=build, fuzzed=None, exitcode=-22, output=str(e), success=False, validated=False)) db.session.commit() raise - print "inject complete %.2f seconds" % (time.time() - start_time) + print("inject complete %.2f seconds" % (time.time() - start_time)) diff --git a/scripts/inject.sh b/scripts/inject.sh index ae50e1e9..df48c2a1 100755 --- a/scripts/inject.sh +++ b/scripts/inject.sh @@ -4,7 +4,7 @@ # Json file required params # # lava: directory of lava repository -# pandahost: what remote host to run panda on + trap '' PIPE set -e # Exit on error @@ -76,7 +76,7 @@ mkdir -p $logs lf="$logs/inject.log" progress "inject" 1 "Starting -- logging to $lf" truncate "$lf" -run_remote "$testinghost" "$python $scripts/inject.py -m $num_bugs $bug_list -e $exit_code $dataflow $json" "$lf" +run_remote "$buildhost" "$python $scripts/inject.py -m $num_bugs $bug_list -e $exit_code $dataflow $json" "$lf" grep yield "$lf" progress "inject" 1 "Finished." diff --git a/scripts/kw-analysis.py b/scripts/kw-analysis.py index d0078c9f..e3230128 100644 --- a/scripts/kw-analysis.py +++ b/scripts/kw-analysis.py @@ -1,19 +1,20 @@ import os import re import glob -import subprocess32 +import subprocess # analyze klocwork results # to determine if it is ever actually finding any of the LAVA bugs debug = False + def run_cmd(args, cw_dir): if debug: if not (cw_dir is None): - print "cwd " + (str(cw_dir)) - print "run_cmd " + (str(args)) - p = subprocess32.Popen(args, cwd=cw_dir, stdout=subprocess32.PIPE, stderr=subprocess32.PIPE) + print("cwd " + (str(cw_dir))) + print("run_cmd " + (str(args))) + p = subprocess.Popen(args, cwd=cw_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) return p.communicate() @@ -33,10 +34,10 @@ def run_cmd(args, cw_dir): if foo: bugs.append(foo.groups()[0]) -print "found %d bugs in repo" % (len(bugs)) +print("found %d bugs in repo" % (len(bugs))) for bug in bugs: - print "bug %s: " % bug, + print("bug %s: " % bug) run_cmd(['git', 'checkout', bug], gitdir) # check out that bug and find file / line for lava_get srcfiles = glob.glob("%s/src/*.[ch]" % gitdir) @@ -44,7 +45,7 @@ def run_cmd(args, cw_dir): (output, bar) = run_cmd(['grep', '-n', 'lava_get()', srcfile], None) if output: line = int(output.split(':')[0]) - print "truth: %d: %s" % (line, srcfile) + print("truth: %d: %s" % (line, srcfile)) (p,fn) = os.path.split(srcfile) (out2, bar) = run_cmd(['grep', fn, "%s/bug-%s-kw.out" % (kwd, bug)], kwd) # print out2 @@ -52,7 +53,7 @@ def run_cmd(args, cw_dir): for o in out2.split("\n"): foo2 = re.search("^[0-9]+ \(Local\) [^:]+:([0-9]+) (.*)$", o) if foo2: - print " kw: " + o + print(" kw: " + o) kw_line = int(foo2.groups()[0]) kw_reason = foo2.groups()[1] # print "kwres: %d: %s" % (kw_line, kw_reason) @@ -60,9 +61,6 @@ def run_cmd(args, cw_dir): correct = True break if correct: - print "SUCCESS" + print("SUCCESS") else: - print "FAILURE" - - - + print("FAILURE") diff --git a/scripts/lava.py b/scripts/lava.py index 0c5509c6..44cf6dbd 100644 --- a/scripts/lava.py +++ b/scripts/lava.py @@ -1,54 +1,46 @@ -from __future__ import print_function - -import os -import re -import sys import math +import os +import random import shlex import struct -import random -import subprocess32 - -from os.path import join -from os.path import dirname +import subprocess +import sys from os.path import abspath from os.path import basename +from os.path import dirname +from os.path import join +from subprocess import PIPE +from subprocess import check_call -from sqlalchemy import Table from sqlalchemy import Column from sqlalchemy import ForeignKey +from sqlalchemy import Table from sqlalchemy import create_engine -from sqlalchemy.types import Text -from sqlalchemy.types import Float -from sqlalchemy.types import Integer -from sqlalchemy.types import Boolean -from sqlalchemy.types import BigInteger +from sqlalchemy.dialects import postgresql +from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import load_only from sqlalchemy.orm import relationship from sqlalchemy.orm import sessionmaker -from sqlalchemy.dialects import postgresql from sqlalchemy.sql.expression import func -from sqlalchemy.ext.declarative import declarative_base - -# from multiprocessing import cpu_count -# from multiprocessing.pool import ThreadPool - -from time import sleep - -from subprocess32 import PIPE -from subprocess32 import check_call +from sqlalchemy.types import BigInteger +from sqlalchemy.types import Boolean +from sqlalchemy.types import Float +from sqlalchemy.types import Integer +from sqlalchemy.types import Text from composite import Composite - -from test_crash import process_crash - from process_compile_commands import get_c_files from process_compile_commands import process_compile_commands +from test_crash import process_crash + +# from multiprocessing import cpu_count +# from multiprocessing.pool import ThreadPool Base = declarative_base() debugging = False -NUM_BUGTYPES = 3 # Make sure this matches what's in lavaTool +NUM_BUGTYPES = 3 # Make sure this matches what's in lavaTool + class Loc(Composite): column = Integer @@ -118,7 +110,7 @@ def __str__(self): input={}, instr={}, fake_dua={})'.format( self.id, self.lval, self.all_labels, self.viable_bytes, self.inputfile, self.instr, self.fake_dua - ) + ) class DuaBytes(Base): @@ -151,6 +143,7 @@ class AttackPoint(Base): POINTER_WRITE = 2 QUERY_POINT = 3 PRINTF_LEAK = 4 + # } type; def __str__(self): @@ -242,7 +235,7 @@ class LavaDatabase(object): def __init__(self, project): self.project = project self.engine = create_engine( - "postgresql+psycopg2://{}@database:5432/{}".format( + "postgresql+psycopg2://{}@database:5432/{}".format( "postgres", project['db'] ) ) @@ -258,10 +251,10 @@ def uninjected(self): # returns uninjected (not yet in the build table) possibly fake bugs def uninjected2(self, fake, allowed_bugtypes=None): - ret = self.uninjected()\ - .join(Bug.atp)\ - .join(Bug.trigger)\ - .join(DuaBytes.dua)\ + ret = self.uninjected() \ + .join(Bug.atp) \ + .join(Bug.trigger) \ + .join(DuaBytes.dua) \ .filter(Dua.fake_dua == fake) if allowed_bugtypes: ret.filter(Bug.type.in_(allowed_bugtypes)) @@ -282,21 +275,21 @@ def uninjected_random_by_atp_bugtype(self, fake, atp_types=None, allowed_bugtype _atps = self.session.query(AttackPoint.id).all() atps = [r.id for r in _atps] - #print(atps) + # print(atps) print("Found {} distinct ATPs".format(len(atps))) results = {} - assert(len(allowed_bugtypes)), "Requires bugtypes" + assert (len(allowed_bugtypes)), "Requires bugtypes" for bugtype in allowed_bugtypes: results[bugtype] = [] for atp in atps: - q = self.session.query(Bug).filter(Bug.atp_id==atp).filter(~Bug.builds.any()) \ - .filter(Bug.type == bugtype) \ - .join(Bug.atp)\ - .join(Bug.trigger)\ - .join(DuaBytes.dua)\ - .filter(Dua.fake_dua == fake) + q = self.session.query(Bug).filter(Bug.atp_id == atp).filter(~Bug.builds.any()) \ + .filter(Bug.type == bugtype) \ + .join(Bug.atp) \ + .join(Bug.trigger) \ + .join(DuaBytes.dua) \ + .filter(Dua.fake_dua == fake) results[bugtype].append(q.order_by(func.random()).limit(atp_lim).all()) return results @@ -333,7 +326,7 @@ def uninjected_random_by_atp(self, fake, atp_types=None, def uninjected_random_limit(self, allowed_bugtypes=None, count=100): # Fast, doesn't support fake bugs, only return IDs of allowed bugtypes - ret = self.session.query(Bug)\ + ret = self.session.query(Bug) \ .filter(~Bug.builds.any()) \ .options(load_only("id")) if allowed_bugtypes: @@ -344,9 +337,9 @@ def uninjected_random_y(self, fake, allowed_bugtypes=None, yield_count=100): # Same as above but yield results ret = self.session.query(Bug) \ .filter(~Bug.builds.any()).yield_per(yield_count) \ - .join(Bug.atp)\ - .join(Bug.trigger)\ - .join(DuaBytes.dua)\ + .join(Bug.atp) \ + .join(Bug.trigger) \ + .join(DuaBytes.dua) \ .filter(Dua.fake_dua == fake) if allowed_bugtypes: ret = ret.filter(Bug.type.in_(allowed_bugtypes)) @@ -354,15 +347,15 @@ def uninjected_random_y(self, fake, allowed_bugtypes=None, yield_count=100): def uninjected_random_balance(self, fake, num_required, bug_types): bugs = [] - types_present = self.session.query(Bug.type)\ - .filter(~Bug.builds.any())\ + types_present = self.session.query(Bug.type) \ + .filter(~Bug.builds.any()) \ .group_by(Bug.type) num_avail = 0 for (i,) in types_present: if i in bug_types: num_avail += 1 print("%d bugs available of allowed types" % num_avail) - assert(num_avail > 0) + assert (num_avail > 0) num_per = num_required / num_avail for (i,) in types_present: if (i in bug_types): @@ -377,34 +370,34 @@ def next_bug_random(self, fake): def run_cmd(cmd, envv=None, timeout=30, cwd=None, rr=False, shell=False): - if type(cmd) in [str, unicode] and not shell: + if type(cmd) in [str] and not shell: cmd = shlex.split(cmd) if debugging: env_string = "" if envv: env_string = " ".join(["{}='{}'".format(k, v) - for k, v in envv.iteritems()]) + for k, v in envv.items()]) if type(cmd) == list: print("run_cmd(" + env_string + " " + - subprocess32.list2cmdline(cmd) + ")") + subprocess.list2cmdline(cmd) + ")") else: print("run_cmd(" + env_string + " " + - cmd + ")") + cmd + ")") # Merge current environ with passed envv merged_env = os.environ.copy() if envv: for k, v in envv.items(): merged_env[k] = v - p = subprocess32.Popen(cmd, cwd=cwd, env=merged_env, stdout=PIPE, - stderr=PIPE, shell=shell) + p = subprocess.Popen(cmd, cwd=cwd, env=merged_env, stdout=PIPE, + stderr=PIPE, shell=shell) try: # returns tuple (stdout, stderr) output = p.communicate(timeout=timeout) if debugging: print("Run_cmd output: {}".format(repr(output[1]))) - except subprocess32.TimeoutExpired: + except subprocess.TimeoutExpired: print("Killing process due to timeout expiration.") p.terminate() return (-9, ("", "timeout expired")) @@ -415,6 +408,7 @@ def run_cmd(cmd, envv=None, timeout=30, cwd=None, rr=False, shell=False): def run_cmd_notimeout(cmd, **kwargs): return run_cmd(cmd, None, None, **kwargs) + # fuzz_labels_list is a list of listof tainted # byte offsets within file filename. # replace those bytes with random in a new @@ -426,7 +420,7 @@ def mutfile(filename, fuzz_labels_list, new_filename, bug, # Open filename, mutate it and store in new_filename such that # it hopefully triggers the passed bug if kt: - assert (knob < 2**16-1) + assert (knob < 2 ** 16 - 1) bug_trigger = bug.magic & 0xffff magic_val = struct.pack(" 0: - extra_query = db.session.query(DuaBytes)\ + extra_query = db.session.query(DuaBytes) \ .filter(DuaBytes.id.in_(bug.extra_duas)) fuzz_labels_list.extend([d.all_labels for d in extra_query]) mutfile(unfuzzed_input, fuzz_labels_list, fuzzed_input, bug, @@ -1093,17 +1078,17 @@ def validate_bug(db, lp, project, bug, bug_index, build, args, update_db, print("retval = %d" % rv) validated = False if bug.trigger.dua.fake_dua is False: - print ("bug type is " + Bug.type_strings[bug.type]) + print("bug type is " + Bug.type_strings[bug.type]) if bug.type == Bug.PRINTF_LEAK: if outp != unfuzzed_outputs[bug.trigger.dua.inputfile]: - print ("printf bug -- outputs disagree\n") + print("printf bug -- outputs disagree\n") validated = True else: # this really is supposed to be a bug # we should see a seg fault or something # NB: Wrapping programs in bash transforms rv -> 128 - rv, # so we do the mod - if (rv % 256) > 128 and rv != -9: # check and ignoring timeouts + if (rv % 256) > 128 and rv != -9: # check and ignoring timeouts print("RV indicates memory corruption") # Default: not checking that bug manifests at same line as # trigger point or is found by competition grading @@ -1147,7 +1132,6 @@ def validate_bug(db, lp, project, bug, bug_index, build, args, update_db, # validate this set of bugs def validate_bugs(bug_list, db, lp, project, input_files, build, args, update_db, competition=False, bug_solutions=None): - timeout = project.get('timeout', 5) print("------------\n") @@ -1187,7 +1171,7 @@ def validate_bugs(bug_list, db, lp, project, input_files, build, bugs_to_inject = db.session.query(Bug).filter(Bug.id.in_(bug_list)).all() for bug_index, bug in enumerate(bugs_to_inject): print("=" * 60) - print("Validating bug {} of {} ". format( + print("Validating bug {} of {} ".format( bug_index + 1, len(bugs_to_inject))) # We should always have solutions for multidua bugs @@ -1209,9 +1193,9 @@ def validate_bugs(bug_list, db, lp, project, input_files, build, print(u"yield {:.2f} ({} out of {}) real bugs (95% CI +/- {:.2f}) " .format(f, len(real_bugs), len(bugs_to_inject), 1.96 * math.sqrt(f * (1 - f) / len(bugs_to_inject)))) - print ("A BOUNTIFUL CROP OF BUGS: %s" % (",".join(map(str, real_bugs)))) + print("A BOUNTIFUL CROP OF BUGS: %s" % (",".join(map(str, real_bugs)))) else: - print ("yield to me") + print("yield to me") print("TESTING COMPLETE") if update_db: diff --git a/scripts/lava.sh b/scripts/lava.sh index 8ff12db0..35cdfc71 100755 --- a/scripts/lava.sh +++ b/scripts/lava.sh @@ -26,9 +26,7 @@ # directory: where you want source to build # name: a name for this project (used to create directories) # inputs: a list of inputs that will be used to find potential bugs (think coverage) -# buildhost: what remote host to build source on -# pandahost: what remote host to run panda and postgres on -# testinghost: what host to test injected bugs on +# buildhost: what remote host to build source and test injected bugs on # fixupscript: script to run after add_query to fix up src before make # @@ -105,7 +103,7 @@ fi if [[ $demo -eq 1 ]] then - gnome-terminal --geometry=90x40 -x bash -c "python $(dirname $0)/demo.py $json; read" & + gnome-terminal --geometry=90x40 -x bash -c "$python $(dirname $0)/demo.py $json; read" & fi progress "everything" 1 "JSON file is $json" @@ -132,9 +130,9 @@ RESET_DB() { lf="$logs/dbwipe.log" truncate "$lf" progress "everything" 1 "Resetting lava db -- logging to $lf" - run_remote "$buildhost" "dropdb -U postgres -h database $db || true" "$lf" - run_remote "$buildhost" "createdb -U postgres -h database $db || true" "$lf" - run_remote "$buildhost" "psql -d $db -h database -f $lava/tools/lavaODB/generated/lava.sql -U postgres" "$lf" + run_remote "$buildhost" "dropdb -U $pguser -h $dbhost $db || true" "$lf" + run_remote "$buildhost" "createdb -U $pguser -h $dbhost $db || true" "$lf" + run_remote "$buildhost" "psql -d $db -h $dbhost -f $lava/tools/lavaODB/generated/lava.sql -U $pguser" "$lf" run_remote "$buildhost" "echo dbwipe complete" "$lf" } @@ -146,6 +144,7 @@ if [ $reset -eq 1 ]; then deldir "$directory/$name/"'*rr-*' # remove all plog files in the directory deldir "$directory/$name/*.plog" + deldir "$directory/$name/*.json" progress "everything" 0 "Truncating logs..." for i in $(ls "$logs" | grep '.log$'); do truncate "$logs/$i" @@ -155,15 +154,12 @@ if [ $reset -eq 1 ]; then echo "reset complete $time_diff seconds" fi - - - if [ $add_queries -eq 1 ]; then tick progress "everything" 1 "Add queries step -- btrace lavatool and fixups" lf="$logs/add_queries.log" truncate "$lf" - progress "everything" 1 "Adding queries to source -- logging to $lf" + progress "everything" 1 "Adding queries to source with type $ATP and $project_name -- logging to $lf" run_remote "$buildhost" "$scripts/add_queries.sh $ATP_TYPE $project_name" "$lf" if [ "$fixupscript" != "null" ]; then lf="$logs/fixups.log" @@ -183,7 +179,8 @@ if [ $make -eq 1 ]; then progress "everything" 1 "Make step -- making 32-bit version with queries" lf="$logs/make.log" truncate "$lf" - run_remote "$buildhost" "cd $sourcedir && CC=/llvm-3.6.2/Release/bin/clang CXX=/llvm-3.6.2/Release/bin/clang++ CFLAGS='-O0 -m32 -DHAVE_CONFIG_H -g -gdwarf-2 -fno-stack-protector -D_FORTIFY_SOURCE=0 -I. -I.. -I../include -I./src/' $makecmd" "$lf" + # Note, adding the static flag is important. We are running the binaries on a PANDA VM, so we have no idea if it will have any libraries we need. + run_remote "$buildhost" "cd $sourcedir && CC=$llvm/bin/clang CXX=$llvm/bin/clang++ CFLAGS='-O0 -m32 -DHAVE_CONFIG_H -g -gdwarf-2 -fno-stack-protector -D_FORTIFY_SOURCE=0 -I. -I.. -I../include -I./src/ -static' $makecmd" "$lf" run_remote "$buildhost" "cd $sourcedir && rm -rf lava-install" "$lf" if [ "$install_simple" == "null" ]; then @@ -211,7 +208,7 @@ if [ $taint -eq 1 ]; then # If we didn't just reset the DB, we need clear out any existing taint labels before running FBI progress "everything" 1 "Clearing taint data from DB" lf="$logs/dbwipe_taint.log" - run_remote "$buildhost" "psql -U postgres -h database -c \"delete from dua_viable_bytes; delete from labelset;\" $db" "$lf" + run_remote "$buildhost" "psql -U $pguser -h $dbhost -c \"delete from dua_viable_bytes; delete from labelset;\" $db" "$lf" fi progress "everything" 1 "Taint step -- running panda and fbi" for input in $inputs @@ -220,16 +217,16 @@ if [ $taint -eq 1 ]; then lf="$logs/bug_mining-$i.log" truncate "$lf" progress "everything" 1 "PANDA taint analysis prospective bug mining -- input $input -- logging to $lf" - run_remote "$buildhost" "$python $scripts/bug_mining.py $hostjson $project_name $input $curtail" "$lf" + run_remote "$buildhost" "$python $scripts/bug_mining.py $hostjson $project_name $input $curtail" "$lf" echo -n "Num Bugs in db: " - bug_count=$(run_remote "$buildhost" "psql -At $db -U postgres -h database -c 'select count(*) from bug'") + bug_count=$(run_remote "$buildhost" "psql -At $db -U $pguser -h $dbhost -c 'select count(*) from bug'") if [ "$bug_count" = "0" ]; then echo "FATAL ERROR: no bugs found" exit 1 fi echo "Found $bug_count bugs" echo - run_remote "$buildhost" "psql $db -U postgres -h database -c 'select count(*), type from bug group by type order by type'" + run_remote "$buildhost" "psql $db -U $pguser -h $dbhost -c 'select count(*), type from bug group by type order by type'" done tock echo "bug_mining complete $time_diff seconds" @@ -249,7 +246,7 @@ if [ $inject -eq 1 ]; then if [ "$injfixupsscript" != "null" ]; then fix="--fixupsscript='$injfixupsscript'" fi - run_remote "$testinghost" "$python $scripts/inject.py -t $bugtypes -m $many -e $exitCode $kt $fix $hostjson $project_name" "$lf" + run_remote "$buildhost" "$python $scripts/inject.py -t $bugtypes -m $many -e $exitCode $kt $fix $hostjson $project_name" "$lf" grep yield "$lf" | grep " real bugs " done fi diff --git a/scripts/manual_lava.py b/scripts/manual_lava.py index 2a8f707d..0da93626 100644 --- a/scripts/manual_lava.py +++ b/scripts/manual_lava.py @@ -1,13 +1,7 @@ -import re import sys -import random import subprocess as sp import sys - -# ok this is gross -sys.path.append("/home/tleek/git/panda-leet/panda/scripts") - -from plog_reader import PLogReader +from pandare.plog_reader import PLogReader # # manual_lava.py @@ -67,20 +61,20 @@ max_tcn = int(sys.argv[2]) # this is where the *exact* binaries that were used to create the slash-sci replay are... -libxml = "install/libxml2/.libs/libxml2.so" +libxml = "install/libxml2/.libs/libxml2.so" xmllint = "install/libxml2/.libs/xmllint" # NB: You will need to fiddle with this script wherever those two variables are used # in order to specialize it to your program + libs. Sorry no time to make it great. -# first go thru plog to get a reasonable mapping +# first go through plog to get a reasonable mapping libs_for_thread = {} with PLogReader(plog) as plr: - for i,m in enumerate(plr): + for i, m in enumerate(plr): if m.HasField("asid_libraries"): al = m.asid_libraries - thread = m.asid # (al.tid, al.create_time) + thread = m.asid # (al.tid, al.create_time) these_libs = [] for lib in al.modules: if "xml" in lib.name: @@ -90,7 +84,6 @@ libs_for_thread[thread] = [] libs_for_thread[thread].append(these_libs) - threads = list(libs_for_thread.keys()) # ok this is also WRONG. # it is assuming the 1st thread is the one you care about (which might be true if you scissored carefully) @@ -98,19 +91,21 @@ n = int(len(libs_for_thread[thread]) / 2) libs = libs_for_thread[thread][n] - tls = {} + + def update_tls(tq): if tq.HasField("unique_label_set"): uls = tq.unique_label_set tls[uls.ptr] = set([]) for l in uls.label: tls[uls.ptr].add(l) - + + def get_module_offset(pc): for lib in libs: if pc >= lib.base_addr and pc < (lib.base_addr + lib.size): - return (lib.name, pc - lib.base_addr) + return lib.name, pc - lib.base_addr return None @@ -126,8 +121,7 @@ def get_src_line(pc): if not (outp == "??:0"): return outp.decode().rstrip() return None - - + def get_fn_offset(a2s, mod_offs): last_possible = None @@ -138,14 +132,14 @@ def get_fn_offset(a2s, mod_offs): break return last_possible + tis = set([]) last = None - num_opportunities = 0 label_liveness = {} with PLogReader(sys.argv[1]) as plr: - for i,m in enumerate(plr): + for i, m in enumerate(plr): if m.HasField("tainted_branch"): tb = m.tainted_branch for tq in tb.taint_query: @@ -155,7 +149,7 @@ def get_fn_offset(a2s, mod_offs): label_liveness[l] += 1 if m.HasField("tainted_instr"): ti = m.tainted_instr - num_copies=0 + num_copies = 0 labels = set([]) for tq in ti.taint_query: update_tls(tq) @@ -171,10 +165,9 @@ def get_fn_offset(a2s, mod_offs): outp += " -- labels [" + (str(labels)) + "]" ml = 0 for l in labels: - ml = max(ml,label_liveness[l]) + ml = max(ml, label_liveness[l]) outp += " -- ml=%d" % ml - print ("trace: instr=%d pc=%x -- %s" % (m.instr, m.pc, outp)) + print("trace: instr=%d pc=%x -- %s" % (m.instr, m.pc, outp)) last = outp print("total of %d injection opportunities" % num_opportunities) - diff --git a/scripts/patch-sources.py b/scripts/patch-sources.py deleted file mode 100644 index 4f8d7da4..00000000 --- a/scripts/patch-sources.py +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/python -import os -import sys -import shutil -import tempfile - -def modify_sources_list(): - def replacer_fn(line): - return line.replace("# deb-src", "deb-src").replace("#deb-src", "deb-src") - - t = tempfile.mktemp() - with open("/etc/apt/sources.list") as in_file: - with open(t, "w") as out_file: - new_lines = map(replacer_fn, (line for line in in_file)) - out_file.write("\n".join(new_lines)) - - shutil.copy(t, "/etc/apt/sources.list") - os.remove(t) - -if __name__ == "__main__": - if os.getuid() != 0: - print "Must run {} as sudo or root".format(sys.argv[0]) - exit(1) - modify_sources_list() diff --git a/scripts/process_compile_commands.py b/scripts/process_compile_commands.py index d608c0d3..7b62640e 100644 --- a/scripts/process_compile_commands.py +++ b/scripts/process_compile_commands.py @@ -2,41 +2,41 @@ import os from os.path import join + # processes compile_commands.json to remove duplicate entries and add extra entries def process_compile_commands(cc_filename, extra_cc_filename): print('Processing compile_commands') - cc_file = open(cc_filename, 'r') - extra_cc_file = None - if os.path.isfile(extra_cc_filename): - extra_cc_file = open(extra_cc_filename, 'r') - compile_commands = json.load(cc_file) + with open(cc_filename, 'r') as cc_file: + compile_commands = json.load(cc_file) + file_set = set() new_compile_commands = [] for f in compile_commands: if join(f['directory'], f['file']) not in file_set: file_set.add(join(f['directory'], f['file'])) new_compile_commands.append(f) - if extra_cc_file: - extra_compile_commands = json.load(extra_cc_file) + + if os.path.isfile(extra_cc_filename): + with open(extra_cc_filename, 'r') as extra_cc_file: + extra_compile_commands = json.load(extra_cc_file) + for f in extra_compile_commands: new_compile_commands.append(f) - extra_cc_file.close() - cc_file.close() - cc_file = open(cc_filename, 'w') - json.dump(new_compile_commands, cc_file) - cc_file.close() + + with open(cc_filename, 'w') as cc_file: + json.dump(new_compile_commands, cc_file) + def get_c_files(bugs_build, cc_filename): - cc_file = open(cc_filename, 'r') - compile_commands = json.load(cc_file) + with open(cc_filename, 'r') as cc_file: + compile_commands = json.load(cc_file) + c_files = set() for f in compile_commands: if not (bugs_build == f['directory']): - c_files.add(os.path.join( - os.path.basename(f['directory']), - f['file'])) + c_files.add(os.path.join(os.path.basename(f['directory']), f['file'])) else: c_files.add(f['file']) - cc_file.close() + return c_files diff --git a/scripts/rdf.py b/scripts/rdf.py index 90dd1c8e..c6f99a11 100644 --- a/scripts/rdf.py +++ b/scripts/rdf.py @@ -1,4 +1,3 @@ - # Input is a bug mining log file. We will determine total # of instr # and proceed to normalize every rdf report in the log by dividing by @@ -16,8 +15,4 @@ if foo: i1 = float(int(foo.groups()[0])) / total_instr i2 = float(int(foo.groups()[1])) / total_instr - print "%.4f %.4f %.4f" % (i1, i2, i1/i2) - - - - + print("%.4f %.4f %.4f" % (i1, i2, i1 / i2)) diff --git a/scripts/replace_macros.py b/scripts/replace_macros.py index c5f6bf40..65949bd9 100644 --- a/scripts/replace_macros.py +++ b/scripts/replace_macros.py @@ -1,25 +1,26 @@ import sys -import os import shutil + def find_end(line, start_idx_off): open_parens = 1 end_idx = 0 for idx, char in enumerate(line[start_idx_off:]): if char == "(": - open_parens+=1 + open_parens += 1 elif char == ")": - open_parens-=1 + open_parens -= 1 - if open_parens == 0: # At end of lavalog + if open_parens == 0: # At the end of lava log end_idx = idx break return end_idx + def cleanup(line): while "LAVALOG(" in line: - start_idx = line.index("LAVALOG(") # start of LAVALOG - start_idx_off = start_idx + len("LAVALOG(") # After the LAVALOG( + start_idx = line.index("LAVALOG(") # start of LAVALOG + start_idx_off = start_idx + len("LAVALOG(") # After the LAVALOG( # asdf *LAVALOG(1234, LAVALOG(1234, value+valu+value, trigger1), trigger2) bsdf # asdf *LAVALOG(1234, value+valu+value, trigger1) bsdf @@ -29,27 +30,28 @@ def cleanup(line): contents = line[start_idx_off:][:end_idx] # Now we have A, VAL...VAL, C - first = contents.index(", ")+2 + first = contents.index(", ") + 2 last = contents.rindex(", ") - line = line[:start_idx] + contents[first:last] + line[start_idx_off+end_idx+1:] + line = line[:start_idx] + contents[first:last] + line[start_idx_off + end_idx + 1:] while "DFLOG" in line: - #DFLOG(115, *(const unsigned int *)ubuf); - #data_flow[115] = *(const...; + # DFLOG(115, *(const unsigned int *)ubuf); + # data_flow[115] = *(const...; start_idx = line.index("DFLOG(") start_idx_off = start_idx + len("DFLOG(") end_idx = find_end(line, start_idx_off) contents = line[start_idx_off:][:end_idx] parts = contents.split(", ") - assert(len(parts) == 2) + assert (len(parts) == 2) contents = "data_flow[{}] = {}".format(parts[0], parts[1]) - line = line[:start_idx] + contents + line[start_idx_off+end_idx+1:] + line = line[:start_idx] + contents + line[start_idx_off + end_idx + 1:] return line + lava_macros = ["#ifdef LAVA_LOGGING", "#ifdef FULL_LAVA_LOGGING", "#ifndef LAVALOG", "#ifdef DUA_LOGGING"] for filename in sys.argv[1:]: scratch = "/tmp/scratch.c" @@ -57,7 +59,7 @@ def cleanup(line): lines = infile.readlines() if not (len(lines) > 1 and lines[0] == "#ifdef LAVA_LOGGING\n"): print("{} is not a LAVALOG'd file".format(infile)) - continue # No lavalogging here + continue # No lavalogging here with open(scratch, "w") as outfile: # Skip past our definitions @@ -66,7 +68,7 @@ def cleanup(line): for macro in lava_macros: if macro in line: in_lava_macro = True - break # Break the macro loop, the in_lava_macro bool will continue + break # Break the macro loop, the in_lava_macro bool will continue if in_lava_macro: if "#endif" in line: @@ -78,5 +80,5 @@ def cleanup(line): else: outfile.write(cleanup(line)) - #os.rename(filename, filename+".bak") + # os.rename(filename, filename+".bak") shutil.copy(scratch, filename) diff --git a/scripts/reset_db.sh b/scripts/reset_db.sh new file mode 100644 index 00000000..1048dda1 --- /dev/null +++ b/scripts/reset_db.sh @@ -0,0 +1,43 @@ + +# Load lava-functions +. `dirname $0`/funcs.sh +lava=$(dirname $(dirname $(readlink -f "$0"))) + +# defaults +ok=0 +reset=0 +reset_db=0 +add_queries=0 +make=0 +taint=0 +inject=0 +num_trials=0 +kt="" +demo=0 +curtail=0 +ATP_TYPE="" +# default bugtypes +bugtypes="ptr_add,rel_write,malloc_off_by_one" +# default # of bugs to be injected at a time +many=50 + +# This is just a dummy values +project_name="toy" + +. `dirname $0`/vars.sh + +sourcedir="$directory/$name/$source" +bugsdir="$directory/$name/bugs" +logs="$directory/$name/logs" + +RESET_DB() { + lf="$logs/dbwipe.log" + truncate "$lf" + progress "everything" 1 "Resetting lava db -- logging to $lf" + run_remote "$buildhost" "dropdb -U $pguser -h $dbhost $db || true" "$lf" + run_remote "$buildhost" "createdb -U $pguser -h $dbhost $db || true" "$lf" + run_remote "$buildhost" "psql -d $db -h $dbhost -f $lava/tools/lavaODB/generated/lava.sql -U $pguser" "$lf" + run_remote "$buildhost" "echo dbwipe complete" "$lf" +} + +RESET_DB diff --git a/scripts/run-on-fuzzed-input.py b/scripts/run-on-fuzzed-input.py index 43bd6ebe..64de1e32 100755 --- a/scripts/run-on-fuzzed-input.py +++ b/scripts/run-on-fuzzed-input.py @@ -1,18 +1,18 @@ -#!/usr/bin/python +#!/usr/bin/env python3 import argparse import json import lockfile import os import string -import subprocess32 +import subprocess import sys import time import difflib import itertools from colorama import Fore assert itertools -from sqlalchemy.orm import joinedload_all + import re import shutil assert re @@ -63,12 +63,12 @@ def get_suffix(fn): return "." + split[-1] def run(args, **kwargs): - print "run(", " ".join(args), ")" - subprocess32.check_call(args, cwd=bugs_build, + print("run(", " ".join(args), ")") + subprocess.check_call(args, cwd=bugs_build, stdout=sys.stdout, stderr=sys.stderr, **kwargs) def exit_error(msg): - print Fore.RED + msg + Fore.RESET + print(Fore.RED + msg + Fore.RESET) sys.exit(1) # here's how to run the built program @@ -77,7 +77,7 @@ def run_modified_program(install_dir, input_file, timeout, rr=False, rr_dir=""): if rr: cmd = "{} record {}".format(RR, cmd) if debugging: - print cmd + print(cmd) envv = {} lib_path = project['library_path'].format(install_dir=install_dir) envv["LD_LIBRARY_PATH"] = join(install_dir, lib_path) @@ -96,19 +96,19 @@ def run_modified_program(install_dir, input_file, timeout, rr=False, rr_dir=""): if "_RR_TRACE_DIR" in envv else "") libpath_env = "LD_LIBRARY_PATH={}".format(envv["LD_LIBRARY_PATH"]) - print "Could not get return code from rr ps" - print "stdout: {}".format(ps_stdout) - print "stderr: {}".format(ps_stderr) - print "cmd: [{} {} {}]".format(rr_env, libpath_env, cmd) - print "{} ps {}".format(RR, rr_dir) + print("Could not get return code from rr ps") + print("stdout: {}".format(ps_stdout)) + print("stderr: {}".format(ps_stderr)) + print("cmd: [{} {} {}]".format(rr_env, libpath_env, cmd)) + print("{} ps {}".format(RR, rr_dir)) sys.exit(1) - return (rc, outp[1:]) + return rc, outp[1:] else: - return (rc, outp) + return rc, outp def confirm_bug_in_executable(install_dir): cmd = project['command'].format(install_dir=install_dir,input_file="foo") - nm_cmd = ('nm {}').format(cmd.split()[0]) + nm_cmd = 'nm {}'.format(cmd.split()[0]) (exitcode, output) = run_cmd_notimeout(nm_cmd, None, {}) if exitcode != 0: @@ -138,11 +138,11 @@ def rr_get_tick_from_event(rr_trace_dir, event_num): m = TICKS_RE.search(pout) return int(m.groups()[0]) except: - print "RR dumps did not return proper ouput for event" - print "========stdout========" - print pout - print "========stderr========" - print perr + print("RR dumps did not return proper ouput for event") + print("========stdout========") + print(pout) + print("========stderr========") + print(perr) sys.exit(1) def get_atp_line(bug, bugs_build): @@ -200,18 +200,18 @@ def do_function(inp): # print "retval = %d" % rv # print "output: [{}]".format(" ;".join(outp)) if args.compareToQueriesBuild: - print "DIFFING . . .", + print("DIFFING . . .") (orig_rv, orig_outp) = run_modified_program(queries_install, fuzzed_input, timeout) diff = list(difflib.ndiff(orig_outp, outp)) if (len(diff) < 2 or not any(map(lambda line: line[0] in ["+", "-"], diff))): - print "SAME!" + print("SAME!") elif all(map(lambda line: line == "", outp)): - print "Inject Build Has No Output - CANCELING" + print("Inject Build Has No Output - CANCELING") pass else: - print "DIFFERENT" - print "".join(diff), + print("DIFFERENT") + print("".join(diff)) # We could try to figure out how to update the DB with the exit code for the # input # if UPDATE_DB: @@ -245,14 +245,14 @@ def do_function(inp): before_tick = rr_get_tick_from_event(rr_trace_dir, before) count = after_tick - before_tick except StopIteration: - print "\"Instruction Count = \" was not in gdb output" + print("\"Instruction Count = \" was not in gdb output") cmd = project['command'].format(install_dir=bugs_install,input_file=fuzzed_input) - print "======gdb out======" - print "\n".join(out) - print "======end gdb out======" - print "Bug_id {} failed on KT:{}".format(bug.id, knobSize) - print "cmd: [{} {} replay {}]".format(lib_prefix, RR, cmd) - print "rr cmd: [{}]".format(full_cmd) + print("======gdb out======") + print("\n".join(out)) + print("======end gdb out======") + print("Bug_id {} failed on KT:{}".format(bug.id, knobSize)) + print("cmd: [{} {} replay {}]".format(lib_prefix, RR, cmd)) + print("rr cmd: [{}]".format(full_cmd)) sys.exit(1) # count = -1 # os.system(full_cmd) @@ -267,24 +267,24 @@ def do_function(inp): full_cmd = "LD_LIBRARY_PATH={} {}".format(lib_path, gdb_cmd) (rc, (out, err)) = run_cmd(gdb_cmd, bugs_install, envv, 10000) # shell=True) if VERBOSE: - print out.split("\n")[-2], err + print(out.split("\n")[-2], err) else: prediction = "{}:{}".format(basename(bug.atp.loc_filename), get_atp_line(bug, bugs_build)) - print "Prediction {}".format(prediction) + print("Prediction {}".format(prediction)) for line in out.split("\n"): if line.startswith("#0"): actual = line.split(" at ")[1] if actual != prediction: - print "Actual {}".format(actual) - print "DIVERGENCE. Exiting . . ." + print("Actual {}".format(actual)) + print("DIVERGENCE. Exiting . . .") sys.exit(1) break else: count = -1 - return (bug.id, knobSize, rv == -6 or rv == -11, count) + return bug.id, knobSize, rv == -6 or rv == -11, count if __name__ == "__main__": @@ -322,7 +322,7 @@ def do_function(inp): if not checkKnobRangeExpression(args.knobTrigger): exit_error("--knobTrigger: \"{}\" is not valid python range expression".format(args.knobRange)) knobRange = sorted(list(set(eval(args.knobTrigger)))) - print "Testing {} inputs for knob offsets in range: {}".format(len(knobRange), knobRange) + print("Testing {} inputs for knob offsets in range: {}".format(len(knobRange), knobRange)) KT = True else: KT = False @@ -363,29 +363,30 @@ def do_function(inp): bugs_parent = join(candidate_path) bugs_lock = lock except lockfile.AlreadyLocked: - print "Can\'t acquire lock on bug folder" + print("Can\'t acquire lock on bug folder") bugs_parent = "" sys.exit(1) candidate += 1 - print "Using dir", bugs_parent + print("Using dir", bugs_parent) - if (not args.noLock): + if not args.noLock: # release bug lock. who cares if another process # could theoretically modify this directory bugs_lock.release() - # atexit.register(bugs_lock.release) + # at exit.register(bugs_lock.release) # for sig in [signal.SIGINT, signal.SIGTERM]: # signal.signal(sig, lambda s, f: sys.exit(-1)) try: os.mkdir(bugs_parent) - except: pass + except: + pass if 'source_root' in project: source_root = project['source_root'] else: - tar_files = subprocess32.check_output(['tar', 'tf', project['tarfile']], stderr=sys.stderr) + tar_files = subprocess.check_output(['tar', 'tf', project['tarfile']], stderr=sys.stderr) source_root = tar_files.splitlines()[0].split(os.path.sep)[0] queries_build = join(top_dir, source_root) @@ -441,21 +442,21 @@ def do_function(inp): os.mkdir(RR_TRACES_TOP_DIR) try: # build succeeded -- testing - print "------------\n" + print("------------\n") # first, try the original file - print "TESTING -- ORIG INPUT" + print("TESTING -- ORIG INPUT") orig_input = join(top_dir, 'inputs', basename(bug.trigger.dua.inputfile)) (rv, outp) = run_modified_program(bugs_install, orig_input, timeout) - if rv != args.exitCode - print "***** buggy program fails on original input!" + if rv != args.exitCode: + print("***** buggy program fails on original input!") assert False else: - print "buggy program succeeds on original input" - print "retval = %d" % rv - print "SUCCESS" + print("buggy program succeeds on original input") + print("retval = %d" % rv) + print("SUCCESS") # second, fuzz it with the magic value - print "TESTING -- FUZZED INPUTS" + print("TESTING -- FUZZED INPUTS") # iterate through knob range or just a list of one element # start 4 worker processes @@ -471,17 +472,17 @@ def do_function(inp): # print "({},{},{},{})".format(bug_id, ks, is_valid, step_size) ################# multiprocessing solution ################### for inp in itertools.product(knobSize_iter, bugs_to_inject): - print "==================================================" + print("==================================================") out_data = do_function(inp) (bug_id, ks, is_valid, step_size) = out_data - print "({},{},{},{})".format(bug_id, ks, is_valid, step_size) + print("({},{},{},{})".format(bug_id, ks, is_valid, step_size)) # if UPDATE_DB: db.session.commit() # NB: at the end of testing, the fuzzed input is still in place # if you want to try it ################################################################## except Exception as e: - print "TESTING FAIL" + print("TESTING FAIL") raise - print "inject complete %.2f seconds" % (time.time() - start_time) + print("inject complete %.2f seconds" % (time.time() - start_time)) diff --git a/scripts/setup_postgres.sh b/scripts/setup_postgres.sh new file mode 100755 index 00000000..e5be0396 --- /dev/null +++ b/scripts/setup_postgres.sh @@ -0,0 +1,46 @@ +#!/bin/bash +set -ex + +# shellcheck disable=SC2034 +sudo="" +if [ $EUID -ne 0 ]; then + SUDO=sudo +fi + +PGPASS="${HOME}/.pgpass" +PG_VERSION=$(psql --version | awk '{print $3}' | cut -d '.' -f 1) + +if [ ! -f "${PGPASS}" ]; then + pg_hba="/etc/postgresql/${PG_VERSION}/main/pg_hba.conf" + postgres_password='postgrespostgres' + + $SUDO sed -i.bak -E 's/^(local\s+all\s+postgres\s+)md5$/\1peer/' "${pg_hba}" + $SUDO service postgresql reload + + password_sql="ALTER USER postgres WITH PASSWORD '${postgres_password}';" + $SUDO -u postgres psql -c "${password_sql}" + + echo "*:*:*:postgres:${postgres_password}" > "${PGPASS}" + chmod 600 "${PGPASS}" + + $SUDO sed -i.bak -E 's/^(local\s+all\s+postgres\s+)peer$/\1md5/' "${pg_hba}" + $SUDO service postgresql reload +fi + +# Define the PostgreSQL version + + +# Define the configuration file paths +PG_CONF="/etc/postgresql/${PG_VERSION}/main/postgresql.conf" +PG_HBA="/etc/postgresql/${PG_VERSION}/main/pg_hba.conf" + +# Update listen_addresses and password_encryption in postgresql.conf +$SUDO sed -i "s/#listen_addresses = 'localhost'/listen_addresses = '0.0.0.0, localhost'/g" $PG_CONF +$SUDO sed -i "s/#password_encryption = scram-sha-256/password_encryption = md5/g" $PG_CONF + +# Update pg_hba.conf +$SUDO echo "host all all 0.0.0.0/0 md5" >> $PG_HBA +$SUDO sed -i 's/scram-sha-256/md5/g' $PG_HBA + +# Restart PostgreSQL service +$SUDO service postgresql restart diff --git a/scripts/shell.sh b/scripts/shell.sh index bfa9d271..e4ad731b 100755 --- a/scripts/shell.sh +++ b/scripts/shell.sh @@ -21,6 +21,7 @@ fi #docker_map_args="$docker_map_args -v $pb_head_dir:$pb_head_dir -v $google_head_dir:$google_head_dir" command=bash +DOCKER_IP=$(ifconfig docker0 | grep 'inet ' | awk '{print $2}') docker run --rm -it \ -e "HTTP_PROXY=$HTTP_PROXY" \ @@ -34,7 +35,7 @@ docker run --rm -it \ -v /etc/shadow:/etc/shadow:ro \ -v /etc/gshadow:/etc/gshadow:ro \ -v /home:/home:ro \ - --add-host=database:172.17.0.1 \ + --add-host=database:$DOCKER_IP \ $docker_map_args \ - $1 sh -c "trap '' PIPE; su -l $(whoami) -c \"export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/llvm-3.6.2/Release/lib; $command\"" \ - #$1 sh -c "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/llvm-3.6.2/Release/lib; bash" \ + $1 sh -c "trap '' PIPE; su -l $(whoami) -c \"export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib/llvm-11/lib; $command\"" \ + diff --git a/scripts/signal_analysis_gdb.py b/scripts/signal_analysis_gdb.py index 8795d395..edaa8910 100644 --- a/scripts/signal_analysis_gdb.py +++ b/scripts/signal_analysis_gdb.py @@ -1,9 +1,11 @@ import sys import os import re -from threading import Thread + + try: from IPython.kernel.zmq.kernelapp import IPKernelApp + ZMQ = True except: # had trouble finding kernel.zmq. Try: @@ -32,23 +34,26 @@ re.MULTILINE) event_regex = re.compile(".*?Current event: ([0-9]+).*", re.MULTILINE) + def get_instr_count(): data = gdb.execute("info record", to_string=True) m = re.search(record_regex, data) - if m == None: + if m is None: print("coulnd't find instruction count in [info record] command") print(data) return int(m.groups()[0]) + def get_event_count(): data = gdb.execute("when", to_string=True) m = re.search(event_regex, data) - if m == None: + if m is None: print("coulnd't find event count in when command") print(data) assert 0 return int(m.groups()[0]) + # bp_num is int def launch_debug_using_ipython(): # run this from a gdb session in order to create a @@ -82,6 +87,7 @@ def launch_debug_using_ipython(): of the gdb session """ + def get_bp_hits(bp_num): data = gdb.execute("info b {}".format(bp_num), to_string=True) hit_str = "breakpoint already hit " @@ -90,17 +96,19 @@ def get_bp_hits(bp_num): else: return int(data.split(hit_str)[1].split()[0]) + EXIT_LOC = "exit" + class ATP_Breakpoint(gdb.Breakpoint): def stop(self): global SIG_EVENT_COUNT # if (gdb.execute("info record", to_string=True) == - # "No record target is currently active.\n"): - # gdb.write("Starting recording process") + # "No record target is currently active.\n"): + # gdb.write("Starting recording process") # else: - # gdb.write("Hit ATP again. Restarting . . .") - # gdb.execute("record stop") + # gdb.write("Hit ATP again. Restarting . . .") + # gdb.execute("record stop") # gdb.execute("record full") gdb.execute("when") print("!! Hit ATP !!") @@ -108,7 +116,7 @@ def stop(self): print("Events =", SIG_EVENT_COUNT, get_event_count()) gdb.execute("q") sys.exit(0) - return True + class Exit_Breakpoint(gdb.Breakpoint): def stop(self): @@ -127,31 +135,34 @@ def stop(self): # gdb.execute("q") # sys.exit(1) + # class GdbCommand(): - # # def __init__(self, cmd): - # # self.cmd = cmd - # def (self): - # cmd = self.args[0] - # # gdb.execute(self.cmd) - # # gdb.write("GDB MESSAGE") - # print "HERE HERE" - # print "About to execute: [{}]".format(self.cmd) - # gdb.execute(self.cmd) - -def event_handler (event): - def handle_sig_event (event): +# # def __init__(self, cmd): +# # self.cmd = cmd +# def (self): +# cmd = self.args[0] +# # gdb.execute(self.cmd) +# # gdb.write("GDB MESSAGE") +# print "HERE HERE" +# print "About to execute: [{}]".format(self.cmd) +# gdb.execute(self.cmd) + +def event_handler(event): + def handle_sig_event(event): if isinstance(event, gdb.SignalEvent): if event.stop_signal in ["SIGSEGV", "SIGABRT"]: print("Found a SIG {}".format(event.stop_signal)) print(gdb.execute("p $_siginfo._sifields._sigfault.si_addr", - to_string=True)) + to_string=True)) # print gdb.execute("info proc mappings", to_string=True) gdb.execute("when") # print "Instruction Count = {}".format(get_instr_count()) ATP_Breakpoint(atp_loc) + def print_fn(s): print(s) return True + # gdb.post_event(lambda s: print_fn("hello") and gdb.execute("reverse-continue")) # gdb.post_event(lambda s: print_fn("hello2") and gdb.execute("reverse-continue")) # gdb.post_event(lambda s: print_fn("hello2") and gdb.execute("reverse-continue")) @@ -161,7 +172,7 @@ def print_fn(s): # gdb.execute("set scheduler-locking off") try: global SIG_EVENT_COUNT - if SIG_EVENT_COUNT == None: + if SIG_EVENT_COUNT is None: SIG_EVENT_COUNT = get_event_count() print("SIG_EVENT_COUNT: {}".format(SIG_EVENT_COUNT)) gdb.execute("reverse-continue") @@ -180,19 +191,19 @@ def print_fn(s): # generic StopEvent handler. We will assume that we only get here from gdb # def handle_stop_event (event): - # if isinstance(event, gdb.StopEvent): - # global BUG_EFFECT_COUNT - # BUG_EFFECT_COUNT += 1 - # print "HANDLING STOP EVENT: STEP COUNT {}".format(BUG_EFFECT_COUNT) - # if BUG_EFFECT_COUNT > 100000: - # print "Instruction Count = {}".format(BUG_EFFECT_COUNT) - # gdb.execute("q") - # else: - # # gdb.post_event(run_gdb_si) - # print "posting an event a si thread from stop event" - # run_gdb_si() - # # thread = Thread(target=run_gdb_si) - # # thread.start() + # if isinstance(event, gdb.StopEvent): + # global BUG_EFFECT_COUNT + # BUG_EFFECT_COUNT += 1 + # print "HANDLING STOP EVENT: STEP COUNT {}".format(BUG_EFFECT_COUNT) + # if BUG_EFFECT_COUNT > 100000: + # print "Instruction Count = {}".format(BUG_EFFECT_COUNT) + # gdb.execute("q") + # else: + # # gdb.post_event(run_gdb_si) + # print "posting an event a si thread from stop event" + # run_gdb_si() + # # thread = Thread(target=run_gdb_si) + # # thread.start() if isinstance(event, gdb.SignalEvent): handle_sig_event(event) @@ -202,6 +213,7 @@ def print_fn(s): # pass # handle_stop_event(event) + gdb.execute("set breakpoint pending on", to_string=True) gdb.execute("set pagination off", to_string=True) # gdb.execute("set logging on", to_string=True) diff --git a/scripts/stacktrace_gdb.py b/scripts/stacktrace_gdb.py index 81fa4772..670c199d 100644 --- a/scripts/stacktrace_gdb.py +++ b/scripts/stacktrace_gdb.py @@ -1,7 +1,6 @@ import sys -import os -import re -from threading import Thread + + try: from IPython.kernel.zmq.kernelapp import IPKernelApp ZMQ = True @@ -12,11 +11,12 @@ try: import gdb except: - print "Either your gdb is not > gdb 7" - print "Or you are trying to run this without gdb" - print "Exiting . . ." + print("Either your gdb is not > gdb 7") + print("Or you are trying to run this without gdb") + print("Exiting . . .") sys.exit(1) + def launch_debug_using_ipython(): # run this from a gdb session in order to create a # ipython session one could connect to for gdb_python symbol @@ -49,38 +49,41 @@ def launch_debug_using_ipython(): of the gdb session """ + class Exit_Breakpoint(gdb.Breakpoint): def stop(self): ret_data = gdb.execute("info arg", to_string=True) ret_code = int(ret_data.split(" = ")[1]) - print "Program exited normal with status: {}".format(ret_code) + print("Program exited normal with status: {}".format(ret_code)) gdb.execute("q") -def event_handler (event): - def handle_sig_event (event): + +def event_handler(event): + def handle_sig_event(event): if isinstance(event, gdb.SignalEvent): if event.stop_signal in ["SIGSEGV", "SIGABRT"]: - print "Found a SIG {}".format(event.stop_signal) - #print gdb.execute("p $_siginfo._sifields._sigfault.si_addr", + print("Found a SIG {}".format(event.stop_signal)) + # print gdb.execute("p $_siginfo._sifields._sigfault.si_addr", # to_string=True) - #print gdb.execute("info proc mappings", to_string=True) + # print gdb.execute("info proc mappings", to_string=True) gdb.execute("bt") gdb.execute("p/x $eip") gdb.execute("q") else: # print "Instruction Count = {}".format(get_instr_count()) - print "Reached unhandled signal event: {}".format(event.stop_signal) - print "Exiting . . ." + print("Reached unhandled signal event: {}".format(event.stop_signal)) + print("Exiting . . .") gdb.execute("q") if isinstance(event, gdb.SignalEvent): handle_sig_event(event) # assume we get here from beginning of rr thread stop point elif isinstance(event, gdb.StopEvent): - print "Reached unhandled stop event: {}".format(event) - print "Exiting . . ." + print("Reached unhandled stop event: {}".format(event)) + print("Exiting . . .") gdb.execute("q") + gdb.execute("set breakpoint pending on", to_string=True) gdb.execute("set pagination off", to_string=True) gdb.execute("set confirm off", to_string=True) diff --git a/scripts/test_crash.py b/scripts/test_crash.py index 7a67779e..0c0c953b 100644 --- a/scripts/test_crash.py +++ b/scripts/test_crash.py @@ -1,7 +1,7 @@ import argparse import json -import subprocess32 -from os import system +import subprocess + def process_crash(buf): """ @@ -11,6 +11,7 @@ def process_crash(buf): returns list of bugids (ints) seen """ bugs = [] + def get_bug_id(line): if len(line.split(":")) > 2: return int(line.split(": ")[1].split(": ")[0]) @@ -24,6 +25,7 @@ def get_bug_id(line): return bugs + def main(args): # Copy built_dir and input_file into /shared # Run sandbox with /shared @@ -32,13 +34,13 @@ def main(args): project = json.loads(args.project.read()) - command = project["command"].format(install_dir=args.install_dir, input_file = args.input) + command = project["command"].format(install_dir=args.install_dir, input_file=args.input) - p = subprocess32.Popen(command, cwd=None, env=None, stdout=subprocess32.PIPE, stderr=subprocess32.PIPE, shell=True) - timeout=10 + p = subprocess.Popen(command, cwd=None, env=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) + timeout = 10 try: - stdout, stderr = p.communicate(timeout=timeout) # returns tuple (stdout, stderr) - except subprocess32.TimeoutExpired: + stdout, stderr = p.communicate(timeout=timeout) # returns tuple (stdout, stderr) + except subprocess.TimeoutExpired: print("Killing process due to timeout expiration.") p.terminate() @@ -63,13 +65,14 @@ def main(args): if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Given an input and a lava-fied binary with LAVA_LOGGING on, determine what bug (if any) is triggered by the input') + parser = argparse.ArgumentParser( + description='Given an input and a lava-fied binary with LAVA_LOGGING on, determine what bug (if any) is triggered by the input') parser.add_argument('project', type=argparse.FileType('r'), - help = 'JSON project file') + help='JSON project file') parser.add_argument('install_dir', - help="Install dir") + help="Install dir") parser.add_argument('input', - help="File to input into binary") + help="File to input into binary") args = parser.parse_args() main(args) diff --git a/scripts/vars.py b/scripts/vars.py index f900c2b7..66a69bf3 100644 --- a/scripts/vars.py +++ b/scripts/vars.py @@ -1,8 +1,10 @@ import json import os + class Project: """ Simple getter/setter class so we can support .get like a json file""" + def __init__(self, data): self.values = data @@ -20,15 +22,18 @@ def get(self, field, default): return self.values[field] else: return default + def keys(self): return self.values.keys() + def validate_host(host): # Path to configs assert 'config_dir' in host # path to qemu exec (correct guest) assert 'qemu' in host + def validate_project(project): # name of project assert 'name' in project @@ -43,6 +48,7 @@ def validate_project(project): # namespace in db for prospective bugs assert 'db' in project + def parse_vars(host_json, project_name): with open(host_json, 'r') as host_f: host = json.load(host_f) @@ -65,9 +71,9 @@ def parse_vars(host_json, project_name): except AssertionError as e: print("Your project config file is missing a required field:\n{}".format(e)) raise - + for field, prefix in [("tarfile", "tar_dir"), ("qcow", "qcow_dir")]: - project[field] = host[prefix]+"/"+project[field] + project[field] = host[prefix] + "/" + project[field] for field, suffix in [("db", "db_suffix")]: project[field] = project[field] + host[suffix] @@ -77,28 +83,31 @@ def parse_vars(host_json, project_name): target_val = [] for inp in project["inputs"]: target_val.append("{config_dir}/{name}/{field}".format(config_dir=host["config_dir"], - name=project["name"], field=inp)) + name=project["name"], field=inp)) project["inputs"] = target_val for field in ["injfixupsscript", "fixupsscript"]: - if field not in project.keys(): continue + if field not in project.keys(): + continue project[field] = ("{config_dir}/{name}/{field}".format(config_dir=host["config_dir"], - name=project["name"], field=project[field])) + name=project["name"], field=project[field])) project["qemu"] = host["qemu"] project["output_dir"] = host["output_dir"] + "/" + project["name"] project["directory"] = host["output_dir"] - project["config_dir"] = host["config_dir"]+"/" + project["name"] + project["config_dir"] = host["config_dir"] + "/" + project["name"] # Replace format strings in project configs project["install"] = project["install"].format(config_dir=project["config_dir"]) return Project(project) + if __name__ == '__main__': # Basic test import sys import pprint + project = parse_vars(sys.argv[1], sys.argv[2]) pprint.pprint(project.values) project["foo"] = "good_fake_val" diff --git a/scripts/vars.sh b/scripts/vars.sh index e96420c2..a04dfd0c 100644 --- a/scripts/vars.sh +++ b/scripts/vars.sh @@ -25,6 +25,15 @@ output_dir="$(jq -r '.output_dir // ""' $hostjson)" config_dir="$(jq -r '.config_dir // ""' $hostjson)/$project_name" tar_dir="$(jq -r '.tar_dir // ""' $hostjson)" db_suffix="$(jq -r '.db_suffix // ""' $hostjson)" +buildhost="$(jq -r '.buildhost // "localhost"' $hostjson)" +dockername="$(jq -r '.docker // "lava32"' $hostjson)" +pguser="$(jq -r '.pguser // "postgres"' $hostjson)" +pgpass="$(jq -r '.pgpass // "postgrespostgres"' $hostjson)" +dbhost="$(jq -r '.host // "database"' $hostjson)" + +export PGUSER=$pguser +export PGPASS=$pgpass + json="${config_dir}/$project_name.json" if [ ! -f $json ]; then @@ -38,6 +47,7 @@ db="$(jq -r .db $json)$db_suffix" extradockerargs="$(jq -r .extra_docker_args $json)" exitCode="$(jq -r .expected_exit_code $json)" dataflow="$(jq -r '.dataflow // "false"' $json)" # TODO use everywhere, stop passing as argument +llvm="/usr/lib/llvm-11" # List of function names to blacklist for data_flow injection, merged as fn1\|fn2\|fn3 so we can use sed # Or an empty string if not present @@ -65,9 +75,6 @@ if [ "$(jq -r .injfixupsscript $json)" != "null" ]; then injfixupsscript="${injfixupsscript/\{bug_build\}/$bug_build}" fi -buildhost="$(jq -r '.buildhost // "docker"' $json)" -pandahost="$(jq -r '.pandahost // "docker"' $json)" -testinghost="$(jq -r '.testinghost // "docker"' $json)" logs="$output_dir/$name/logs" makecmd="$(jq -r .make $json)" @@ -76,10 +83,8 @@ install="${install/\{config_dir\}/$config_dir}" # Format string replacement for post_install="$(jq -r .post_install $json)" install_simple=$(jq -r .install_simple $json) configure_cmd=$(jq -r '.configure // "/bin/true"' $json) -container="$(jq -r '.docker // "lava32"' $json)" # Constants scripts="$lava/scripts" -python="/usr/bin/python" -pdb="/usr/bin/python -m pdb " -dockername="lava32" +python="python3" +pdb="python3 -m pdb " diff --git a/setup.py b/setup.py deleted file mode 100755 index 0f6824fa..00000000 --- a/setup.py +++ /dev/null @@ -1,320 +0,0 @@ -#!/usr/bin/python - -import os -import re -import sys -import stat -import shlex -import getpass -import argparse -import subprocess - -from os.path import join -from os.path import isdir -from os.path import isfile -from os.path import abspath -from os.path import dirname -from os.path import expandvars - -from colorama import Fore -from colorama import Style - -from multiprocessing import cpu_count - - -def command_exited_nonzero(cmd): - try: - with open(os.devnull, 'w') as devnull: - subprocess.check_output(cmd, - shell=True, - stderr=devnull) - return False - except subprocess.CalledProcessError: - return True - - -def is_package_installed(pkg): - if pkg == "docker.io": - return os.path.isfile("/usr/bin/docker") - if (os.path.isfile(os.path.join("/usr/bin", pkg)) or - os.path.isfile(os.path.join("/bin", pkg))): - return True - if command_exited_nonzero("dpkg -s {}".format(pkg)): - # maybe it is a python package - try: - python_pkg = pkg.split("python-")[1] - return not command_exited_nonzero("python -c \"import {}\"" - .format(python_pkg)) - # pkg is not a string of "python-{}" - except IndexError: - return False - else: - return True - - -if not is_package_installed("python-colorama"): - subprocess.check_call(['sudo', 'apt-get', - 'install', '-y', 'python-colorama']) - -# this is set to denote user is already in docker group -ALREADY_IN_DOCKER_GROUP = False -LLVM_VERSION = "3.6.2" -DOCKER_NAME = "lava32" - -LAVA_DIR = dirname(abspath(sys.argv[0])) -os.chdir(LAVA_DIR) - -BUILD_DIR = join(os.environ["HOME"], "build") -try: - os.mkdir(BUILD_DIR) -except Exception: - pass - -# try to import lava.mak as a config file -# if not then resort to default locations for llvm and panda -try: - def_lines = (line.strip() for line in open("lava.mak", "r") - if not line.strip().startswith("#") - and line.strip() != "") - def_lines = (line.split(":=") for line in def_lines) - def_lines = ((line[0].strip(), line[1].strip()) for line in def_lines) - LAVA_CONFS = dict(def_lines) - #PANDA_DIR = LAVA_CONFS["PANDA_SRC_PATH"] - #PANDA_DIR = expandvars(PANDA_DIR) - LLVM_DIR = LAVA_CONFS["LLVM_SRC_PATH"] - LLVM_DIR = expandvars(LLVM_DIR) -except Exception: - LLVM_DIR = join(BUILD_DIR, "llvm-" + LLVM_VERSION) - -# Panda must the submodule -PANDA_DIR = abspath(join(LAVA_DIR, "panda/src")) -PANDA_BUILD_DIR = join(PANDA_DIR, '../build') - -# panda/scripts/install_ubuntu.sh -PANDA_UBUNTU = "https://raw.githubusercontent.com/panda-re/panda" \ - "/master/panda/scripts/install_ubuntu.sh" - -# libc6 needed for compiling btrace -# libjsoncpp needed for fbi json parsing -LAVA_DEPS = ["libjsoncpp-dev", "postgresql", "jq", "python-psycopg2", - "python-sqlalchemy", "socat", "libpq-dev", "cmake", - "docker.io", "bc", "python-pexpect", "python-psutil", - "python-lockfile", "genisoimage", "inotify-tools", - "build-essential", "python-pip", "libprotobuf-c0-dev", - "libodb-pgsql-2.4", "libfdt-dev"] - -PANDA_MAK = """ -# This is an autogenerated file from lava/setup.py. -PANDA_SRC_PATH := {PANDA_DIR} -PANDA_BUILD_DIR := {PANDA_DIR}/../build -""" -LLVM_MAK = """ -# This is an autogenerated file from lava/setup.py. -LLVM_SRC_PATH := {LLVM_SRC_PATH} -LLVM_BUILD_PATH := {LLVM_BUILD_PATH} -LLVM_BIN_PATH := $(LLVM_BUILD_PATH)/install/bin -""" - - -def progress(msg): - print('') -# PANDA_UBUNTU = "https://goo.gl/GNMNmJ" - print(Fore.GREEN + '[setup.py] ' + Fore.RESET + Style.BRIGHT - + msg + Style.RESET_ALL) - - -def error(msg): - print('') - print(Fore.RED + '[setup.py] ' + Fore.RESET + Style.BRIGHT - + msg + Style.RESET_ALL) - sys.exit(1) - - -def cmd_to_list(cmd): - cmd_args = shlex.split(cmd) if isinstance(cmd, str) else cmd - cmd = subprocess.list2cmdline(cmd_args) - return cmd, cmd_args - - -def run(cmd): - cmd, cmd_args = cmd_to_list(cmd) - try: - progress("Running [{}] . . . ".format(cmd)) - subprocess.check_call(cmd_args) - except subprocess.CalledProcessError: - error("[{}] cmd did not execute properly.".format(cmd)) - raise - - -def user_in_docker(username): - # grep exits with 0 if pattern found, 1 otherwise - return not command_exited_nonzero("groups {} | grep docker" - .format(username)) - - -DOCKER_MAP_DIRS = [LAVA_DIR, os.environ['HOME']] -DOCKER_MAP_FILES = ['/etc/passwd', '/etc/group', - '/etc/shadow', '/etc/gshadow'] -map_dirs_dedup = [] -# quadratic but who cares -for d in DOCKER_MAP_DIRS: - add = True - for d2 in DOCKER_MAP_DIRS: - if d is not d2 and d.startswith(d2): - add = False - break - if add: - map_dirs_dedup.append(d) - -map_dirs_args = sum([['-v', '{0}:{0}'.format(d)] - for d in map_dirs_dedup], []) -map_files_args = sum([['-v', '{0}:{0}:ro'.format(d)] - for d in DOCKER_MAP_FILES], []) - -ENV_VARS = ['HTTP_PROXY', 'HTTPS_PROXY', 'http_proxy', - 'https_proxy', 'no_proxy'] -env_map = {k: os.environ[k] for k in ENV_VARS if k in os.environ} -env_var_args = sum([['-e', '{}={}'.format(k, v)] - for k, v in env_map.iteritems()], []) -build_args = sum([['--build-arg', '{}={}'.format(k, v)] - for k, v in env_map.iteritems()], []) - -ALREADY_IN_DOCKER_GROUP = user_in_docker(getpass.getuser()) - - -def run_docker(cmd, workdir=None): - cmd, cmd_args = cmd_to_list(cmd) - sudo_args = [] if ALREADY_IN_DOCKER_GROUP else ['sudo'] - if workdir: - cmd = "cd {} && ".format(workdir) + cmd - # Have to be sudo in case we just installed docker - # and don't have the group yet. - cmd_args = sudo_args + ['docker', 'run', '--rm'] + map_dirs_args + \ - map_files_args + env_var_args + \ - [DOCKER_NAME, 'su', '-l', getpass.getuser(), '-c', cmd] - try: - progress("Running in docker [{}] . . . ".format(cmd)) - print "[{}]".format(" ".join(cmd_args)) - subprocess.check_call(cmd_args) - except subprocess.CalledProcessError: - error("[{}] cmd did not execute properly.") - raise - - -def main(): - parser = argparse.ArgumentParser(description='Setup LAVA') - parser.add_argument('-s', '--skip_docker_build', action='store_true', - default=False, - help='Whether or not to skip building docker image') - parser.add_argument('-f', '--fast', action='store_true', default=False, - help='Whether or not to skip building \ - binutils and glibc') - args = parser.parse_args() - IGNORE_DOCKER = args.skip_docker_build - - progress("In LAVA dir at {}".format(LAVA_DIR)) - # check to make sure we are not running as root/sudo - if os.getuid() == 0: - error("sudo/root privileges detected. \ - Run as user!\nUSAGE: {}".format(sys.argv[0])) - - progress("Installing LAVA apt-get dependencies") - #if not all(map(is_package_installed, LAVA_DEPS)): - # run(['sudo', 'apt-get', '-y', 'install'] + LAVA_DEPS) - - # set up postgres authentication. - if not isfile(join(os.environ['HOME'], '.pgpass')): - postgres_depends = subprocess.check_output(['dpkg-query', '-W', '-f', - '${depends}', - 'postgresql']).splitlines() - postgres_pkg = [d for d in postgres_depends - if re.match(r'postgresql-[0-9]+.?[0-9]+', d)][0] - postgres_version = postgres_pkg.replace('postgresql-', '') - pg_hba = "/etc/postgresql/{}/main/pg_hba.conf".format(postgres_version) - postgres_password = 'postgrespostgres' - run(['sudo', 'sed', '-i.bak', '-E', - r's/^(local\s+all\s+postgres\s+)md5$/\1peer/', pg_hba]) - run("sudo service postgresql reload") - password_sql = "ALTER USER postgres WITH PASSWORD '{}';" \ - .format(postgres_password) - run(['sudo', '-u', 'postgres', 'psql', '-c', password_sql]) - pgpass = join(os.environ['HOME'], '.pgpass') - with open(pgpass, 'w') as f: - f.write('*:*:*:postgres:{}'.format(postgres_password)) - os.chmod(pgpass, stat.S_IRUSR | stat.S_IWUSR) - run(['sudo', 'sed', '-i.bak', '-E', - r's/^(local\s+all\s+postgres\s+)peer$/\1md5/', pg_hba]) - run("sudo service postgresql reload") - - # check that user has docker install and docker privileges - progress("Checking if user is in docker group") - if not ALREADY_IN_DOCKER_GROUP: - run(['sudo', 'usermod', '-a', '-G', 'docker', getpass.getuser()]) - - # check that user has the LAVA build docker vm build - # if not run python scripts/build-docker.py - if not IGNORE_DOCKER: - progress("Checking that {} docker is properly built" - .format(DOCKER_NAME)) - sudo_args = [] if ALREADY_IN_DOCKER_GROUP else ['sudo'] - run(sudo_args + ['docker', 'build', '-t', - DOCKER_NAME, join(LAVA_DIR, 'docker')] + build_args) - compile_cmd = ['cd', join(LAVA_DIR, 'tools', 'btrace'), - '&&', 'bash', 'compile.sh'] - run_docker(['bash', '-c', subprocess.list2cmdline(compile_cmd)]) - - progress("Initializing PANDA submodule") - run(['git', 'submodule', 'init']) - run(['git', 'submodule', 'update']) - - # check for location of panda in PANDA_DIR - # make sure that it is PANDA2 - progress("Checking for PANDA in " + PANDA_DIR) - if not isdir(PANDA_DIR) or \ - not isfile(join(LAVA_DIR, "tools", "fbi", "panda.mak")) or \ - not isfile(join(PANDA_BUILD_DIR, 'config.log')): - progress("Building PANDA in " + PANDA_BUILD_DIR) - try: - os.makedirs(PANDA_BUILD_DIR) - except OSError: - print "Warning: Panda build directory is already there" - os.chdir(PANDA_DIR) - # The dtc submodule no longer works through git://, so we replace it with https://. - run(['sed', '-i', 's|url = git://git.qemu-project.org/dtc.git|url = https://git.qemu-project.org/dtc.git|g', '.gitmodules']) - # sync the submodule (apply the url protocol change) - run(['git', 'submodule', 'sync']) - run(['git', 'submodule', 'update', '--init', 'dtc']) - os.chdir(PANDA_BUILD_DIR) - run_docker([join(PANDA_DIR, '../setup.sh')]) - os.chdir(LAVA_DIR) - # Compile lavaTool inside the docker container. - progress("Creating $LAVA_DIR/tools/lavaTool/config.mak") - with open("tools/lavaTool/config.mak", "w") as f: - LLVM_DOCKER_DIR = '/llvm-{}'.format(LLVM_VERSION) - f.write(LLVM_MAK.format(LLVM_BUILD_PATH=LLVM_DOCKER_DIR, - LLVM_SRC_PATH=LLVM_DOCKER_DIR)) - os.chdir(LAVA_DIR) - - if not isfile(join(LAVA_DIR, "fbi", "panda.mak")): - progress("Creating $LAVA_DIR/tools/fbi/panda.mak") - with open(join(LAVA_DIR, "tools/fbi/panda.mak"), "w") as f: - f.write(PANDA_MAK.format(PANDA_DIR=PANDA_DIR)) - - if not isfile(join(LAVA_DIR, "lava.mak")): - progress("Creating $LAVA_DIR/lava.mak") - with open("lava.mak", 'w') as f: - f.write(PANDA_MAK.format(PANDA_DIR=PANDA_DIR)) - f.write(LLVM_MAK.format(LLVM_BUILD_PATH=LLVM_DOCKER_DIR, - LLVM_SRC_PATH=LLVM_DOCKER_DIR)) - - # ----------------End .mak file stuff --------------------- - progress("Making each component of lava, fbi and lavaTool") - - os.chdir(LAVA_DIR) - run_docker(['python', 'setup_container.py'], workdir=LAVA_DIR) - - return 0 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/setup_container.py b/setup_container.py deleted file mode 100644 index d76d8608..00000000 --- a/setup_container.py +++ /dev/null @@ -1,106 +0,0 @@ -#!/usr/bin/python -import os -import sys -import shlex -import subprocess -from colorama import Fore -from colorama import Style - - -LLVM_VERSION = "3.6.2" - -LAVA_DIR = os.path.dirname(os.path.abspath(sys.argv[0])) -os.chdir(LAVA_DIR) - -PANDA_DIR = os.path.abspath(os.path.join(LAVA_DIR, "panda/src")) -PANDA_BUILD_DIR = os.path.join(PANDA_DIR, '../build') - -PANDA_MAK = """ -# This is an autogenerated file from lava/setup.py. -PANDA_SRC_PATH := {PANDA_DIR} -PANDA_BUILD_DIR := {PANDA_DIR}/../build -""" -LLVM_MAK = """ -# This is an autogenerated file from lava/setup.py. -LLVM_SRC_PATH := {LLVM_SRC_PATH} -LLVM_BUILD_PATH := {LLVM_BUILD_PATH} -LLVM_BIN_PATH := $(LLVM_BUILD_PATH)/install/bin -""" - - -def progress(msg): - print('') -# PANDA_UBUNTU = "https://goo.gl/GNMNmJ" - print(Fore.GREEN + '[setup.py] ' + Fore.RESET + Style.BRIGHT - + msg + Style.RESET_ALL) - -def error(msg): - print('') - print(Fore.RED + '[setup.py] ' + Fore.RESET + Style.BRIGHT - + msg + Style.RESET_ALL) - sys.exit(1) - - -def cmd_to_list(cmd): - cmd_args = shlex.split(cmd) if isinstance(cmd, str) else cmd - cmd = subprocess.list2cmdline(cmd_args) - return cmd, cmd_args - -def run(cmd): - cmd, cmd_args = cmd_to_list(cmd) - try: - progress("Running [{}] . . . ".format(cmd)) - subprocess.check_call(cmd_args) - except subprocess.CalledProcessError: - error("[{}] cmd did not execute properly.".format(cmd)) - raise - -if __name__ == '__main__': - # Compile btrace - compile_cmd = ['cd', os.path.join(LAVA_DIR, 'tools', 'btrace'), - '&&', 'bash', 'compile.sh'] - run(['bash', '-c', subprocess.list2cmdline(compile_cmd)]) - # Compile lavaTool inside the docker container. - progress("Creating $LAVA_DIR/tools/lavaTool/config.mak") - with open("tools/lavaTool/config.mak", "w") as f: - LLVM_DOCKER_DIR = '/llvm-{}'.format(LLVM_VERSION) - f.write(LLVM_MAK.format(LLVM_BUILD_PATH=LLVM_DOCKER_DIR, - LLVM_SRC_PATH=LLVM_DOCKER_DIR)) - run(['rm', '-rf', os.path.join(LAVA_DIR, 'tools/build')]) - run(['mkdir', '-p', os.path.join(LAVA_DIR, 'tools/build')]) - run(['mkdir', '-p', os.path.join(LAVA_DIR, 'tools/install')]) - - run(['cmake', '-B{}'.format(os.path.join(LAVA_DIR, 'tools/build')), - '-H{}'.format(os.path.join(LAVA_DIR, 'tools')), - '-DCMAKE_INSTALL_PREFIX={}'.format(os.path.join(LAVA_DIR, - 'tools/install'))]) - run(['make','--no-print-directory','-j4', 'install', '-C', - os.path.join(LAVA_DIR, 'tools/build/lavaTool')]) - - # -----------Beginning .mak file stuff ------------------- - # I think this would be useful, but i'm seperating it out - # in case anyone thinks it's a bad idea - # the idea is that if someone wants llvm and panda installed in certain - # locations, they can make their lava.mak ahead of time - # then setup.py will parse it and configure the environmet to those specs - os.chdir(LAVA_DIR) - - if not os.path.isfile(os.path.join(LAVA_DIR, "fbi", "panda.mak")): - progress("Creating $LAVA_DIR/tools/fbi/panda.mak") - with open(os.path.join(LAVA_DIR, "tools/fbi/panda.mak"), "w") as f: - f.write(PANDA_MAK.format(PANDA_DIR=PANDA_DIR)) - - if not os.path.isfile(os.path.join(LAVA_DIR, "lava.mak")): - progress("Creating $LAVA_DIR/lava.mak") - with open("lava.mak", 'w') as f: - f.write(PANDA_MAK.format(PANDA_DIR=PANDA_DIR)) - f.write(LLVM_MAK.format(LLVM_BUILD_PATH=LLVM_DOCKER_DIR, - LLVM_SRC_PATH=LLVM_DOCKER_DIR)) - - # ----------------End .mak file stuff --------------------- - progress("Making each component of lava, fbi and lavaTool") - progress("Compiling fbi") - - os.chdir(os.path.join(LAVA_DIR, "tools/build")) - run("make --no-print-directory -j4 -C fbi install") - os.chdir(LAVA_DIR) diff --git a/setup_container.sh b/setup_container.sh new file mode 100755 index 00000000..509c62e8 --- /dev/null +++ b/setup_container.sh @@ -0,0 +1,36 @@ +#!/bin/bash +set -ex + +progress() { + echo + echo -e "\e[32m[lava_install]\e[0m \e[1m$1\e[0m" +} + +if [ -z "${LLVM_DIR}" ]; then + echo "LLVM_DIR is not set ${LLVM_DIR}, setting it to /usr/lib/llvm-11" + export LLVM_DIR=/usr/lib/llvm-11 +else + echo "LLVM_DIR is set to '${LLVM_DIR}'" +fi + + +LAVA_DIR=$(dirname "$(realpath "$0")") +echo "LAVA_DIR: $LAVA_DIR" + +progress "Compile btrace" +pushd "$LAVA_DIR/tools/btrace" +./compile.sh +popd + +progress "Compiling lavaTool" + +rm -rf "$LAVA_DIR/tools/build" +mkdir -p "$LAVA_DIR/tools/build" +mkdir -p "$LAVA_DIR/tools/install" + +cmake -B"$LAVA_DIR/tools/build" -H"${LAVA_DIR}/tools" -DCMAKE_INSTALL_PREFIX="${LAVA_DIR}/tools/install" +make --no-print-directory -j4 install -C "${LAVA_DIR}/tools/build/lavaTool" + +progress "Compiling fbi" + +make --no-print-directory -j4 install -C "${LAVA_DIR}/tools/build/fbi" diff --git a/setup_postgres.py b/setup_postgres.py deleted file mode 100644 index 8050251f..00000000 --- a/setup_postgres.py +++ /dev/null @@ -1,52 +0,0 @@ -import os -import re -import shlex -import stat -import subprocess - -# Setup postgres. Mostly copied from setup.py - - -def cmd_to_list(cmd): - cmd_args = shlex.split(cmd) if isinstance(cmd, str) else cmd - cmd = subprocess.list2cmdline(cmd_args) - return cmd, cmd_args - - -def run(cmd): - cmd, cmd_args = cmd_to_list(cmd) - try: - print("Running [{}] . . . ".format(cmd)) - subprocess.check_call(cmd_args) - except subprocess.CalledProcessError: - print("[{}] cmd did not execute properly.".format(cmd)) - raise - - -def main(): - if not os.path.isfile(os.path.join(os.environ['HOME'], '.pgpass')): - postgres_depends = subprocess.check_output(['dpkg-query', '-W', '-f', - '${depends}', - 'postgresql']).splitlines() - postgres_pkg = [d for d in postgres_depends - if re.match(r'postgresql-[0-9]+.?[0-9]+', d)][0] - postgres_version = postgres_pkg.replace('postgresql-', '') - pg_hba = "/etc/postgresql/{}/main/pg_hba.conf".format(postgres_version) - postgres_password = 'postgrespostgres' - run(['sudo', 'sed', '-i.bak', '-E', - r's/^(local\s+all\s+postgres\s+)md5$/\1peer/', pg_hba]) - run("sudo service postgresql reload") - password_sql = "ALTER USER postgres WITH PASSWORD '{}';" \ - .format(postgres_password) - run(['sudo', '-u', 'postgres', 'psql', '-c', password_sql]) - pgpass = os.path.join(os.environ['HOME'], '.pgpass') - with open(pgpass, 'w') as f: - f.write('*:*:*:postgres:{}'.format(postgres_password)) - os.chmod(pgpass, stat.S_IRUSR | stat.S_IWUSR) - run(['sudo', 'sed', '-i.bak', '-E', - r's/^(local\s+all\s+postgres\s+)peer$/\1md5/', pg_hba]) - run("sudo service postgresql reload") - - -if __name__ == "__main__": - main() diff --git a/target_bins/file-5.22-pre.tar.gz b/target_bins/file-5.22-pre.tar.gz index 2553018d..fa8e3b1d 100644 Binary files a/target_bins/file-5.22-pre.tar.gz and b/target_bins/file-5.22-pre.tar.gz differ diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 94650fc5..4d01d357 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required (VERSION 3.1) +cmake_minimum_required(VERSION 3.15) project (LAVA VERSION 2.0.0) set (CMAKE_CONFIGURATION_TYPES "Release" CACHE STRING "Configs" FORCE) diff --git a/tools/btrace/sw-btrace-to-compiledb b/tools/btrace/sw-btrace-to-compiledb index ff914320..f7b2015b 100755 --- a/tools/btrace/sw-btrace-to-compiledb +++ b/tools/btrace/sw-btrace-to-compiledb @@ -1,6 +1,5 @@ -#!/usr/bin/env python -# Python 2 or 3 -- works on 2.6 and up. -from __future__ import absolute_import, print_function, unicode_literals +#!/usr/bin/env python3 + import json import os import sys @@ -175,5 +174,6 @@ def main(): with open("compile_commands.json", "w") as f: json.dump(results, f, indent=4) + if __name__ == "__main__": main() diff --git a/tools/fbi/src/CMakeLists.txt b/tools/fbi/src/CMakeLists.txt index 04e041e3..33e0c9c2 100644 --- a/tools/fbi/src/CMakeLists.txt +++ b/tools/fbi/src/CMakeLists.txt @@ -1,35 +1,16 @@ +cmake_minimum_required(VERSION 3.15) project (FBI LANGUAGES CXX) -# Panda src, headers and build locations -set (PANDA_SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../../../panda/src) -set (PANDA_BUILD_DIR ${PANDA_SRC_PATH}/../build) -set (PANDA_HEADERS ${PANDA_SRC_PATH}/panda/include) - -# fbilib target -add_library (fbilib - ${PANDA_SRC_PATH}/panda/src/plog.c - ${PANDA_SRC_PATH}/panda/src/plog-cc.cpp -) -target_include_directories(fbilib BEFORE - PUBLIC - ${PANDA_SRC_PATH}/panda/include - ${PANDA_BUILD_DIR}/i386-softmmu - /usr/lib/odb/x86_64-linux-gnu/include -) -target_compile_options(fbilib PRIVATE -D_GLIBCXX_USE_CXX11_ABI=0 -DPLOG_READER) -set_property(TARGET fbilib PROPERTY CXX_STANDARD 14) - # fbi target add_executable(fbi find_bug_inj.cpp) -set_property(TARGET fbi PROPERTY CXX_STANDARD 14) +set_property(TARGET fbi PROPERTY CXX_STANDARD 17) -target_compile_options(fbi PRIVATE -D_GLIBCXX_USE_CXX11_ABI=0) -if (${DEBUG}) +#if (${DEBUG}) target_compile_options(fbi PRIVATE -fno-omit-frame-pointer -g -O0) -else() - target_compile_options(fbi PRIVATE -flto -O3) - set_target_properties(fbi PROPERTIES LINK_FLAGS "-flto -fuse-ld=gold") -endif() +#else() +# target_compile_options(fbi PRIVATE -flto -O3) +# set_target_properties(fbi PROPERTIES LINK_FLAGS "-flto -fuse-ld=gold") +#endif() target_include_directories(fbi BEFORE PUBLIC @@ -37,29 +18,27 @@ target_include_directories(fbi BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/../../lavaODB/generated ${CMAKE_CURRENT_SOURCE_DIR}/../../lavaODB/include ${CMAKE_CURRENT_SOURCE_DIR}/../../include - ${PANDA_SRC_PATH}/panda/include - ${PANDA_BUILD_DIR}/i386-softmmu /usr/lib/odb/x86_64-linux-gnu/include - ) + /usr/local/include + /usr/include/jsoncpp +) + add_dependencies(fbi lava-odb_x64) set_target_properties(fbi PROPERTIES LINK_FLAGS "-L/usr/local/lib") target_link_libraries(fbi - fbilib lavaDB_x64 lava-odb_x64 protobuf-c z odb odb-pgsql - jsoncpp - pq protobuf - ${PANDA_BUILD_DIR}/i386-softmmu/plog.pb.o - ${PANDA_BUILD_DIR}/i386-softmmu/plog.pb-c.o + jsoncpp ) + install (TARGETS fbi RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib/static OPTIONAL - ) + ) \ No newline at end of file diff --git a/tools/fbi/src/find_bug_inj.cpp b/tools/fbi/src/find_bug_inj.cpp index ded5e806..c4838444 100644 --- a/tools/fbi/src/find_bug_inj.cpp +++ b/tools/fbi/src/find_bug_inj.cpp @@ -18,10 +18,6 @@ extern "C" { #include #include #include - -#include "panda/plog.h" -#include "panda/plog-cc-bridge.h" -#include "panda/plog_print.h" } #include @@ -47,6 +43,7 @@ extern "C" { #include "lava_version.h" #include #include +#include #define CBNO_TCN_BIT 0 #define CBNO_CRD_BIT 1 @@ -235,19 +232,28 @@ std::vector LoadIDB(std::string fn) { return InvertDB(x); } -void update_unique_taint_sets(const Panda__TaintQueryUniqueLabelSet *tquls) { +void update_unique_taint_sets(Json::Value& tquls) { if (debug) { printf("UNIQUE TAINT SET\n"); - spit_tquls(tquls); - printf("\n"); + Json::StyledWriter writer; + std::string jsonString = writer.write(tquls); + // spit_tquls(tquls); + std::cout << jsonString; + printf("\n"); } // maintain mapping from ptr (uint64_t) to actual set of taint labels - Ptr p = tquls->ptr; + Ptr p = std::strtoull(tquls["ptr"].asString().c_str(), 0, 0); auto it = ptr_to_labelset.lower_bound(p); + int max_index = tquls["label"].size() - 1; if (it == ptr_to_labelset.end() || p < it->first) { - const LabelSet *ls = create(LabelSet{0, p, inputfile, - std::vector(tquls->label, - tquls->label + tquls->n_label)}); + + std::vector vec; + // Populate contents of vector with that of "label" + for (Json::Value& element : tquls["label"]) { + vec.push_back(std::strtoul(element.asString().c_str(), 0, 0)); + } + + const LabelSet *ls = create(LabelSet{0, p, inputfile, vec}); ptr_to_labelset.insert(it, std::make_pair(p, ls)); auto &labels = ls->labels; @@ -262,6 +268,10 @@ void update_unique_taint_sets(const Panda__TaintQueryUniqueLabelSet *tquls) { bool is_header_file(std::string filename) { uint32_t l = filename.length(); + if (l < 2) { + // Can occur with files like '9', so we just skip them. + return true; + } return (filename[l-2] == '.' && filename[l-1] == 'h'); } @@ -343,7 +353,7 @@ inline Range get_dua_dead_range(const Dua *dua, const std::vector &to_ count_nonzero(viable_bytes)); if (dua->lval->ast_name.find("nodua") != std::string::npos) { dprintf("Found nodua symbol, skipping"); - dprintf(dua->lval->ast_name.c_str()); + dprintf("%s", dua->lval->ast_name.c_str()); dprintf("\n"); Range empty{0, 0}; return empty; @@ -396,23 +406,23 @@ template void record_injectable_bugs_at(const AttackPoint *atp, bool is_new_atp, std::initializer_list extra_duas); -void taint_query_pri(Panda__LogEntry *ple) { - assert (ple != NULL); - Panda__TaintQueryPri *tqh = ple->taint_query_pri; - assert (tqh != NULL); +void taint_query_pri(Json::Value& ple) { + Json::Value tqh = ple["taintQueryPri"]; // size of query in bytes & num tainted bytes found // bdg: don't try handle lvals that are bigger than our max lval - uint32_t len = std::min(tqh->len, max_lval); - uint32_t num_tainted = tqh->num_tainted; + uint32_t len = std::min((uint32_t) std::strtoul(tqh["len"].asString().c_str(), 0, 0), max_lval); + uint32_t num_tainted = std::strtoul(tqh["numTainted"].asString().c_str(), 0, 0); // entry 1 is source info - Panda__SrcInfoPri *si = tqh->src_info; + Json::Value si = tqh["srcInfo"]; // ignore duas in header files - if (is_header_file(std::string(si->filename))) return; - assert (si != NULL); + if (is_header_file(std::string(si["filename"].asString()))) { + return; + } + // entry 2 is callstack -- ignore - Panda__CallStack *cs = tqh->call_stack; - assert (cs != NULL); - uint64_t instr = ple->instr; + Json::Value cs = tqh["callStack"]; + + uint64_t instr = std::strtoull(ple["instr"].asString().c_str(), 0, 0); dprintf("TAINT QUERY HYPERCALL len=%d num_tainted=%d\n", len, num_tainted); // collects set (as sorted vec) of labels on all viable bytes @@ -427,11 +437,10 @@ void taint_query_pri(Panda__LogEntry *ple) { // collect "ok" bytes, which have low enough taint compute num and card, // and also aren't tainted by too-live input bytes // go through and deal with new unique taint sets first - for (uint32_t i=0; in_taint_query; i++) { - Panda__TaintQuery *tq = tqh->taint_query[i]; - if (tq->unique_label_set) { + for (Json::Value &tq: tqh["taintQuery"]) { + if (tq.isMember("uniqueLabelSet")) { // collect new unique taint label sets - update_unique_taint_sets(tq->unique_label_set); + update_unique_taint_sets(tq["uniqueLabelSet"]); } } @@ -443,27 +452,27 @@ void taint_query_pri(Panda__LogEntry *ple) { std::vector viable_byte(len, nullptr); std::vector byte_tcn(len, 0); - dprintf("considering taint queries on %lu bytes\n", tqh->n_taint_query); + dprintf("considering taint queries on %llu bytes\n", std::strtoull(tqh["num_tainted"].asString().c_str(), 0, 0)); bool is_dua = false; bool is_fake_dua = false; uint32_t num_viable_bytes = 0; // optimization. don't need to check each byte if we don't have enough. if (num_tainted >= LAVA_MAGIC_VALUE_SIZE) { - for (uint32_t i = 0; i < tqh->n_taint_query; i++) { - Panda__TaintQuery *tq = tqh->taint_query[i]; - uint32_t offset = tq->offset; - if (offset >= len) continue; - dprintf("considering offset = %d\n", offset); - const LabelSet *ls = ptr_to_labelset.at(tq->ptr); - - byte_tcn[offset] = tq->tcn; + for (const auto& tq : tqh["taintQuery"]) { + uint32_t offset = std::strtoul(tq["offset"].asString().c_str(), 0, 0); + if (offset >= len) { + continue; + } + dprintf("considering offset = %d\n", offset); + const LabelSet *ls = ptr_to_labelset.at(std::strtoull(tq["ptr"].asString().c_str(), 0, 0)); + byte_tcn[offset] = std::strtoul(tq["tcn"].asString().c_str(), 0, 0); // flag for tracking *why* we discarded a byte // check tcn and cardinality of taint set first uint32_t current_byte_not_ok = 0; - current_byte_not_ok |= (tq->tcn > max_tcn) << CBNO_TCN_BIT; - current_byte_not_ok |= (ls->labels.size() > max_card) << CBNO_CRD_BIT; + current_byte_not_ok |= (std::strtoul(tq["tcn"].asString().c_str(), 0, 0)) > max_tcn << CBNO_TCN_BIT; + current_byte_not_ok |= (ls->labels.size() > max_card) << CBNO_CRD_BIT; if (current_byte_not_ok && debug) { // discard this byte dprintf("discarding byte -- here's why: %x\n", current_byte_not_ok); @@ -475,7 +484,7 @@ void taint_query_pri(Panda__LogEntry *ple) { dprintf("retaining byte\n"); // this byte is ok to retain. // keep track of highest tcn, liveness, and card for any viable byte for this lval - c_max_tcn = std::max(tq->tcn, c_max_tcn); + c_max_tcn = std::max((uint32_t) std::strtoul(tq["tcn"].asString().c_str(), 0, 0), c_max_tcn); c_max_card = std::max((uint32_t) ls->labels.size(), c_max_card); merge_into(ls->labels.begin(), ls->labels.end(), all_labels); @@ -502,23 +511,18 @@ void taint_query_pri(Panda__LogEntry *ple) { // create a fake dua if we can if (chaff_bugs && !is_dua - && tqh->len - num_tainted >= LAVA_MAGIC_VALUE_SIZE) { + && std::strtoul(tqh["len"].asString().c_str(), 0, 0) - num_tainted >= LAVA_MAGIC_VALUE_SIZE) { dprintf("not enough taint -- what about non-taint?\n"); dprintf("len=%d num_tainted=%d\n", len, num_tainted); viable_byte.assign(viable_byte.size(), nullptr); uint32_t count = 0; - Panda__TaintQuery **tqp = tqh->taint_query; - Panda__TaintQuery **tqp_end = tqp + tqh->n_taint_query; - for (uint32_t i = 0; i < viable_byte.size(); i++) { + uint32_t i = 0; + // TODO: I suspect issues will arise here... + for (Json::Value& tq : tqh["taintQuery"]) { // Assume these are sorted by offset. // Keep two iterators, one in viable_byte, one in tqh->taint_query. // Iterate over both and fill gaps in tqh into viable_byte. - if (tqp && tqp < tqp_end && (*tqp)->offset < i) { - tqp++; - } - Panda__TaintQuery *tq = (tqp && tqp < tqp_end) ? *tqp : nullptr; - assert(!tq || tq->offset >= i); - if (!tq || tq->offset > i || !tq->ptr) { + if (std::strtoul(tq["offset"].asString().c_str(), 0, 0) > i) { // if untainted, we can guarantee that we can use the untainted // bytes to produce a bug that definitely won't trigger. // so we create a fake, empty labelset. @@ -530,7 +534,10 @@ void taint_query_pri(Panda__LogEntry *ple) { viable_byte[i] = fake_ls; count++; } - if (count >= LAVA_MAGIC_VALUE_SIZE) break; + ++i; + if (count >= LAVA_MAGIC_VALUE_SIZE) { + break; + } } assert(count >= LAVA_MAGIC_VALUE_SIZE); is_fake_dua = true; @@ -541,16 +548,16 @@ void taint_query_pri(Panda__LogEntry *ple) { if (is_dua || is_fake_dua) { // looks like we can subvert this for either real or fake bug. // NB: we don't know liveness info yet. defer byte selection until later. - assert(si->has_ast_loc_id); - LavaASTLoc ast_loc(ind2str[si->ast_loc_id]); + assert(si.isMember("astLocId")); + unsigned long ast_loc_id = std::strtoul(si["astLocId"].asString().c_str(), 0, 0); + LavaASTLoc ast_loc(ind2str[ast_loc_id]); assert(ast_loc.filename.size() > 0); - const SourceLval *lval = create(SourceLval{0, - ast_loc, si->astnodename, len}); + const SourceLval *lval = create(SourceLval{0, ast_loc, si["astnodename"].asString(), len}); const Dua *dua = create(Dua(lval, std::move(viable_byte), std::move(byte_tcn), std::move(all_labels), inputfile, - c_max_tcn, c_max_card, ple->instr, is_fake_dua)); + c_max_tcn, c_max_card, std::strtoull(ple["instr"].asString().c_str(), 0, 0), is_fake_dua)); if (is_dua) { // Only track liveness for non-fake duas. @@ -576,7 +583,7 @@ void taint_query_pri(Panda__LogEntry *ple) { // Update recent_dead_duas + recent_duas_by_instr: // 1) erase at most one in r_d_by_instr w/ same lval_id. // 2) insert/update in recent_dead_duas - // 2) insert new dua into r_d_by_instr, probably at end. + // 3) insert new dua into r_d_by_instr, probably at end. unsigned long lval_id = lval->id; auto it_lval = recent_dead_duas.lower_bound(lval_id); if (it_lval == recent_dead_duas.end() || lval_id < it_lval->first) { @@ -616,35 +623,33 @@ void taint_query_pri(Panda__LogEntry *ple) { if (is_dua) num_real_duas++; if (is_fake_dua) num_fake_duas++; } else { - dprintf("discarded %u viable bytes %lu labels %s:%u %s", - num_viable_bytes, all_labels.size(), si->filename, si->linenum, - si->astnodename); + dprintf("discarded %u viable bytes %lu labels %s:%lu %s", + num_viable_bytes, all_labels.size(), si["filename"].asString().c_str(), + std::strtoul(si["linenum"].asString().c_str(), 0, 0), + si["astnodename"].asString().c_str()); } t.commit(); } // update liveness measure for each of taint labels (file bytes) associated with a byte in lval that was queried -void update_liveness(Panda__LogEntry *ple) { - assert (ple != NULL); - Panda__TaintedBranch *tb = ple->tainted_branch; - assert (tb != NULL); +void update_liveness(const Json::Value& ple) { + Json::Value tb = ple["taintedBranch"]; dprintf("TAINTED BRANCH\n"); transaction t(db->begin()); std::vector all_labels; - for (uint32_t i=0; in_taint_query; i++) { - Panda__TaintQuery *tq = tb->taint_query[i]; + for (Json::Value& tq: tb["taintQuery"]) { assert (tq); - if (tq->unique_label_set) { + if (tq.isMember("uniqueLabelSet")) { // keep track of unique taint label sets - update_unique_taint_sets(tq->unique_label_set); + update_unique_taint_sets(tq["uniqueLabelSet"]); } // if (debug) { spit_tq(tq); printf("\n"); } // This should be O(mn) for m sets, n elems each. // though we should have n >> m in our worst case. const std::vector &cur_labels = - ptr_to_labelset.at(tq->ptr)->labels; + ptr_to_labelset.at(std::strtoul(tq["ptr"].asString().c_str(), 0, 0)) -> labels; merge_into(cur_labels.begin(), cur_labels.end(), all_labels); } t.commit(); @@ -857,18 +862,20 @@ void record_injectable_bugs_at(const AttackPoint *atp, bool is_new_atp, } } -void attack_point_lval_usage(Panda__LogEntry *ple) { - assert (ple != NULL); - Panda__AttackPoint *pleatp = ple->attack_point; - if (pleatp->src_info->has_ast_loc_id) - dprintf ("attack point id = %d\n", pleatp->src_info->ast_loc_id); +void attack_point_lval_usage(Json::Value ple) { + Json::Value pleatp = ple["attackPoint"]; + unsigned long ast_id; - assert (pleatp != NULL); - Panda__SrcInfo *si = pleatp->src_info; + if (pleatp["srcInfo"].isMember("astLocId")) { + ast_id = std::strtoul(pleatp["srcInfo"]["astLocId"].asString().c_str(), 0, 0); + dprintf ("attack point id = %lu\n", ast_id); + } + Json::Value si = pleatp["srcInfo"]; // ignore duas in header files - if (is_header_file(ind2str[si->filename])) return; - - assert (si != NULL); + if (is_header_file(si["filename"].asString())) { + return; + } + // assert (si.isMember("srcInfo"); dprintf("ATTACK POINT\n"); if (recent_dead_duas.size() == 0) { dprintf("no duas yet -- discarding attack point\n"); @@ -876,18 +883,18 @@ void attack_point_lval_usage(Panda__LogEntry *ple) { } dprintf("%lu viable duas remain\n", recent_dead_duas.size()); - assert(si->has_ast_loc_id); - LavaASTLoc ast_loc(ind2str[si->ast_loc_id]); + assert(si.isMember("astLocId")); + LavaASTLoc ast_loc(ind2str[ast_id]); assert(ast_loc.filename.size() > 0); transaction t(db->begin()); const AttackPoint *atp; bool is_new_atp; std::tie(atp, is_new_atp) = create_full(AttackPoint{0, - ast_loc, (AttackPoint::Type)pleatp->info}); + ast_loc, (AttackPoint::Type) std::strtoul(pleatp["info"].asString().c_str(), 0, 0)}); dprintf("@ATP: %s\n", std::string(*atp).c_str()); // Don't decimate PTR_ADD bugs. - switch ((AttackPoint::Type)pleatp->info) { + switch ((AttackPoint::Type) std::strtoul(pleatp["info"].asString().c_str(), 0, 0)) { case AttackPoint::POINTER_WRITE: record_injectable_bugs_at(atp, is_new_atp, { }); // fall through @@ -905,9 +912,9 @@ void attack_point_lval_usage(Panda__LogEntry *ple) { t.commit(); } -void record_call(Panda__LogEntry *ple) { } +void record_call(Json::Value ple) { } -void record_ret(Panda__LogEntry *ple) { } +void record_ret(Json::Value ple) { } int main (int argc, char **argv) { if (argc != 5 && argc !=6 ) { @@ -961,7 +968,7 @@ int main (int argc, char **argv) { if (!project["max_liveness"].isUInt()) { throw std::runtime_error("Could not parse max_liveness"); } - max_liveness = project["max_liveness"].asUInt(); + max_liveness = std::strtoul(project["max_liveness"].asString().c_str(), 0, 0); printf("maximum liveness score of %lu\n", max_liveness); if (!project.isMember("max_cardinality")) { @@ -971,7 +978,7 @@ int main (int argc, char **argv) { if (!project["max_cardinality"].isUInt()) { throw std::runtime_error("Could not parse max_cardinality"); } - max_card = project["max_cardinality"].asUInt(); + max_card = std::strtoul(project["max_cardinality"].asString().c_str(), 0, 0); printf("max card of taint set returned by query = %d\n", max_card); if (!project.isMember("max_tcn")) { @@ -981,7 +988,7 @@ int main (int argc, char **argv) { if (!project["max_tcn"].isUInt()) { throw std::runtime_error("Could not parse max_tcn"); } - max_tcn = project["max_tcn"].asUInt(); + max_tcn = std::strtoul(project["max_tcn"].asString().c_str(), 0, 0); printf("max tcn for addr = %d\n", max_tcn); if (!project.isMember("max_lval_size")) { @@ -991,7 +998,7 @@ int main (int argc, char **argv) { if (!project["max_lval_size"].isUInt()) { throw std::runtime_error("Could not parse max_lval_size"); } - max_lval = project["max_lval_size"].asUInt(); + max_lval = std::strtoul(project["max_lval_size"].asString().c_str(), 0, 0); printf("max lval size = %d\n", max_lval); /* Unsupported for now (why?) @@ -1008,9 +1015,9 @@ int main (int argc, char **argv) { if (curtail == 0) { // Will be 0 unless specified on command line if (!project["curtail_fbi"].isUInt()) { curtail = 0; - }else{ + } else{ // null should never happen, if it does we'll violate an assert in the asUInt - curtail = project.get("curtail_fbi", Json::Value::null).asUInt(); + curtail = std::strtoul(project.get("curtail_fbi", Json::Value::null).asString().c_str(), 0, 0); } } printf("Curtail is %d\n", curtail); @@ -1018,24 +1025,59 @@ int main (int argc, char **argv) { inputfile = std::string(argv[4]); std::string db_name = project["db"].asString() + host.get("db_suffix", "").asString(); - std::string DBHost("database"); - int DBPort = 5432; - db.reset(new odb::pgsql::database("postgres", "postgrespostgres", + std::string DBHost = host.get("host", "database").asString(); + int DBPort = host.get("port", 5432).asInt(); + + const char* pgpass = std::getenv("PGPASS"); + const char* pguser = std::getenv("PGUSER"); + if (pgpass) { + // PGPASS environment variable is set, and pgpass points to its value. + std::cout << "PGPASS IS SET" << std::endl; + } else { + // PGPASS environment variable is not set. + std::cout << "PGPASS is not set" << std::endl; + exit(1); + } + + if (pguser) { + // PGUSER environment variable is set, and pgpass points to its value. + std::cout << "PGUSER IS SET: " << pguser << std::endl; + } else { + // PGUSER environment variable is not set. + std::cout << "PGUSER is not set" << std::endl; + exit(1); + } + + std::cout << "Name: " << db_name << std::endl; + std::cout << "Host: " << DBHost << std::endl; + std::cout << "Port: " << DBPort << std::endl; + + db.reset(new odb::pgsql::database(pguser, pgpass, db_name, DBHost, DBPort)); /* re-read pandalog, this time focusing on taint queries. Look for dead available data, attack points, and thus bug injection oppotunities */ - pandalog_open(plog.c_str(), "r"); + // pandalog_open(plog.c_str(), "r"); + // See line 949 + std::ifstream plog_json(plog.c_str()); + Json::Value plog_file; + plog_json >> plog_file; + uint64_t num_entries_read = 0; - while (1) { + + for (Json::Value& ple: plog_file) { // collect log entries that have same instr count (and pc). // these are to be considered together. - Panda__LogEntry *ple; - ple = pandalog_read_entry(); - if (ple == NULL) break; - num_entries_read++; + // Panda__LogEntry *ple; + //ple = pandalog_read_entry(); + //if (ple == NULL) { + // break; + //} + + num_entries_read++; + // std::cout << "*** Reading Entry " << num_entries_read << "\n"; if ((num_entries_read % 10000) == 0) { printf("processed %lu pandalog entries \n", num_entries_read); std::cout << num_bugs_added_to_db << " added to db " @@ -1044,26 +1086,26 @@ int main (int argc, char **argv) { << num_fake_duas << " fake duas\n"; } - if (ple->taint_query_pri) { + if (ple.isMember("taintQueryPri")) { taint_query_pri(ple); - } else if (ple->tainted_branch) { + } else if (ple.isMember("taintedBranch")) { update_liveness(ple); - } else if (ple->attack_point) { + } else if (ple.isMember("attackPoint")) { attack_point_lval_usage(ple); - } else if (ple->dwarf_call) { + } else if (ple.isMember("dwarfCall")) { record_call(ple); - } else if (ple->dwarf_ret) { + } else if (ple.isMember("dwarfRet")) { record_ret(ple); } - pandalog_free_entry(ple); + // pandalog_free_entry(ple); if (curtail > 0 && num_real_duas > curtail) { std::cout << "*** Curtailing output of fbi at " << num_real_duas << "\n"; break; } - } + } // for std::cout << num_bugs_added_to_db << " added to db "; - pandalog_close(); + // pandalog_close(); std::cout << num_potential_bugs << " potential bugs\n"; std::cout << num_potential_nonbugs << " potential non bugs\n"; diff --git a/tools/lavaDB/src/CMakeLists.txt b/tools/lavaDB/src/CMakeLists.txt index 998ebcae..0b8d1320 100644 --- a/tools/lavaDB/src/CMakeLists.txt +++ b/tools/lavaDB/src/CMakeLists.txt @@ -3,5 +3,4 @@ #set_target_properties(lavaDB_x32 PROPERTIES COMPILE_FLAGS "-m32" LINK_FLAGS "-m32") add_library (lavaDB_x64 STATIC lavaDB.cpp) -set_property(TARGET lavaDB_x64 PROPERTY CXX_STANDARD 11) -target_compile_options(lavaDB_x64 PRIVATE -D_GLIBCXX_USE_CXX11_ABI=0) +set_property(TARGET lavaDB_x64 PROPERTY CXX_STANDARD 17) diff --git a/tools/lavaODB/include/spit.hxx b/tools/lavaODB/include/spit.hxx index 2f4340e9..4f9c4c55 100644 --- a/tools/lavaODB/include/spit.hxx +++ b/tools/lavaODB/include/spit.hxx @@ -7,34 +7,46 @@ extern std::vector ind2str; -static void spit_tquls(const Panda__TaintQueryUniqueLabelSet *tquls) { - printf("tquls=[ptr=0x%" PRIx64 ",n_label=%d,label=[", tquls->ptr, (int) tquls->n_label); - for (uint32_t i=0; in_label; i++) { - printf("%d", tquls->label[i]); - if (i+1n_label) printf(","); - } +static void spit_tquls(Json::Value& tquls) { + uint32_t n_label = tquls["label"].size(); + printf("tquls=[ptr=0x%" PRIx64 ",n_label=%u,label=[", std::strtoul(tquls["ptr"].asString().c_str(), 0, 0), n_label); + + int i = 0; + for (Json::Value& element : tquls["label"]) { + printf("%lu", std::strtoul(element.asString().c_str(), 0, 0)); + if (i + 1 < n_label) { + printf(","); + } + ++i; + } printf("]]"); } -static void spit_tq(Panda__TaintQuery *tq) { - printf("tq=[ptr=0x%" PRIx64 ",tcn=%d,offset=%d]", tq->ptr, tq->tcn, tq->offset); +static void spit_tq(const Json::Value& tq) { + printf("tq=[ptr=0x%" PRIx64 ",tcn=%lu,offset=%lu]", + std::strtoul(tq["ptr"].asString().c_str(), 0, 0), + std::strtoul(tq["tcn"].asString().c_str(), 0, 0), + std::strtoul(tq["offset"].asString().c_str(), 0, 0)); } -static void spit_si(Panda__SrcInfo *si) { - printf("si=[filename='%s',line=%d,", (char*) ind2str[si->filename].c_str(), si->linenum); - printf("astnodename='%s',", (char *) ind2str[si->astnodename].c_str()); - if (si->has_insertionpoint) { - printf("insertionpoint=%d", si->insertionpoint); +static void spit_si(const Json::Value& si) { + printf("si=[filename='%s',line=%lu,", si["filename"].asString().c_str(), std::strtoul(si["linenum"].asString().c_str(), 0, 0)); + printf("astnodename='%s',", si["astnodename"].asString().c_str()); + if (si.isMember("insertionpoint")) { + printf("insertionpoint=%lu", std::strtoul(si["insertionpoint"].asString().c_str(), 0, 0)); } printf("]"); } -static void spit_tqh(Panda__TaintQueryHypercall *tqh) { - printf("tqh=[buf=0x%" PRIx64 ",len=%d,num_tainted=%d]", tqh->buf, tqh->len, tqh->num_tainted); +static void spit_tqh(const Json::Value& tqh) { + printf("tqh=[buf=0x%" PRIx64 ",len=%lu,num_tainted=%lu]", + std::strtoul(tqh["buf"].asString().c_str(), 0, 0), + std::strtoul(tqh["len"].asString().c_str(), 0, 0), + std::strtoul(tqh["num_tainted"].asString().c_str(), 0, 0)); } -static void spit_ap(Panda__AttackPoint *ap) { - printf("ap=[info=%d]", ap->info); +static void spit_ap(const Json::Value& ap) { + printf("ap=[info=%lu]", std::strtoul(ap["info"].asString().c_str(), 0, 0)); } #endif diff --git a/tools/lavaODB/src/CMakeLists.txt b/tools/lavaODB/src/CMakeLists.txt index f09e3388..d4255712 100644 --- a/tools/lavaODB/src/CMakeLists.txt +++ b/tools/lavaODB/src/CMakeLists.txt @@ -38,8 +38,7 @@ add_custom_target (cleanup # ${CMAKE_CURRENT_SOURCE_DIR}/../include) add_library(lava-odb_x64 STATIC ${GENERATED}/lava-odb.cxx) -target_compile_options(lava-odb_x64 PRIVATE -D_GLIBCXX_USE_CXX11_ABI=0) -set_property(TARGET lava-odb_x64 PROPERTY CXX_STANDARD 11) +set_property(TARGET lava-odb_x64 PROPERTY CXX_STANDARD 17) target_link_libraries(lava-odb_x64 odb odb-pgsql) add_dependencies(lava-odb_x64 cleanup) target_include_directories(lava-odb_x64 BEFORE diff --git a/tools/lavaTool/CMakeLists.txt b/tools/lavaTool/CMakeLists.txt index febd4f0a..4507c51c 100644 --- a/tools/lavaTool/CMakeLists.txt +++ b/tools/lavaTool/CMakeLists.txt @@ -1 +1,5 @@ +cmake_minimum_required(VERSION 3.15) +project(lavaTool) + +# Add the src subdirectory add_subdirectory(src) diff --git a/tools/lavaTool/compile-on-docker.sh b/tools/lavaTool/compile-on-docker.sh deleted file mode 100755 index c8326f5c..00000000 --- a/tools/lavaTool/compile-on-docker.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash - -set -x - -lava="$(dirname "$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )")" -llvm_home="/llvm-3.6.2" - -docker run --rm -it \ - -e "HTTP_PROXY=$HTTP_PROXY" \ - -e "HTTPS_PROXY=$HTTPS_PROXY" \ - -e "http_proxy=$http_proxy" \ - -e "https_proxy=$https_proxy" \ - -e "LLVM_DIR=$llvm_home" \ - -v /var/run/postgresql:/var/run/postgresql \ - -v /etc/passwd:/etc/passwd:ro \ - -v /etc/group:/etc/group:ro \ - -v /etc/shadow:/etc/shadow:ro \ - -v /etc/gshadow:/etc/gshadow:ro \ - -v $HOME:$HOME \ - -v "$lava":"$lava" \ - lava32 sh -c "trap '' PIPE; su -l $(whoami) -c 'cmake -B$lava/build -H$lava -DCMAKE_INSTALL_PREFIX=$lava/install' && su -l $(whoami) -c 'make --no-print-directory -j$(nproc) -C \"$lava\"/build/lavaTool install'" - -#For debug builds- add -DCMAKE_BUILD_TYPE=RelWithDebInfo to the above command diff --git a/tools/lavaTool/get_c_files.py b/tools/lavaTool/get_c_files.py index c90f3994..203fccdd 100755 --- a/tools/lavaTool/get_c_files.py +++ b/tools/lavaTool/get_c_files.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import json import os @@ -17,9 +17,9 @@ def processCompileCommands(srcPath): cFiles = [] modificationNeeded = False pathStr = os.path.join(srcPath, 'compile_commands.json') - jsonFile = open(pathStr, 'r') - compileCommands = json.load(jsonFile) - jsonFile.close() + with open(pathStr, 'r') as jsonFile: + compileCommands = json.load(jsonFile) + newCompileCommands = compileCommands[:] for i in compileCommands: if 'Werror' in i['command']: @@ -37,22 +37,20 @@ def processCompileCommands(srcPath): if modificationNeeded: shutil.copyfile(pathStr, os.path.join(srcPath, 'compile_commands_original.json')) - jsonFile = open(pathStr, 'w') - json.dump(newCompileCommands, jsonFile, indent=4) - jsonFile.close() + with open(pathStr, 'w') as jsonFile: + json.dump(newCompileCommands, jsonFile, indent=4) - jsonFile.close() return newCompileCommands def getCFiles(compileCommands): for d in compileCommands: - print os.path.join(d['directory'], d['file']) + print(os.path.join(d['directory'], d['file'])) def main(): if (len(sys.argv) < 2): - print 'Usage: ./get_c_files.py ' + print('Usage: ./get_c_files.py ') sys.exit(1) newCompileCommands = processCompileCommands(sys.argv[1]) getCFiles(newCompileCommands) diff --git a/tools/lavaTool/include/CallExprArgAdditionalHandler.h b/tools/lavaTool/include/CallExprArgAdditionalHandler.h index e1fe6fa5..06900fcc 100644 --- a/tools/lavaTool/include/CallExprArgAdditionalHandler.h +++ b/tools/lavaTool/include/CallExprArgAdditionalHandler.h @@ -6,10 +6,10 @@ struct CallExprArgAdditionHandler : public LavaMatchHandler { using LavaMatchHandler::LavaMatchHandler; // Inherit constructor. void CAddArg(const CallExpr *call) { - SourceLocation l1 = call->getLocStart(); - SourceLocation l2 = call->getLocEnd(); - debug(FNARG) << "call->getLocStart = " << Mod.sm->getFileOffset(l1) << "\n"; - debug(FNARG) << "call->getLocEnd = " << Mod.sm->getFileOffset(l2) << "\n"; + SourceLocation l1 = call->getBeginLoc(); + SourceLocation l2 = call->getEndLoc(); + debug(FNARG) << "call->getBeginLoc = " << Mod.sm->getFileOffset(l1) << "\n"; + debug(FNARG) << "call->getEndLoc = " << Mod.sm->getFileOffset(l2) << "\n"; bool inv=false; debug(FNARG) << "call : [" << getStringBetweenRange(*Mod.sm, call->getSourceRange(), &inv) << "]\n"; assert(!inv); @@ -21,14 +21,14 @@ struct CallExprArgAdditionHandler : public LavaMatchHandler { debug(FNARG) << "CallExprArgAdditionHandler\n"; bool inv; - SourceLocation l1 = call->getLocStart(); - SourceLocation l2 = call->getLocEnd(); + SourceLocation l1 = call->getBeginLoc(); + SourceLocation l2 = call->getEndLoc(); std::string cestr = getStringBetweenRange(*Mod.sm, call->getSourceRange(), &inv); assert (!inv); debug(FNARG) << "callexpr: [" << cestr << "\n"; SourceLocation loc = clang::Lexer::findLocationAfterToken( - call->getLocStart(), tok::l_paren, *Mod.sm, *Mod.LangOpts, true); + call->getBeginLoc(), tok::l_paren, *Mod.sm, *Mod.LangOpts, true); // No need to check for ArgDataflow, since matcher only called then auto fnname = get_containing_function_name(Result, *call); @@ -60,10 +60,10 @@ struct CallExprArgAdditionHandler : public LavaMatchHandler { if (func == nullptr || func->getLocation().isInvalid()) { // Function Pointer debug(FNARG) << "function pointer use\n"; - call->getLocStart().print(debug(FNARG), *Mod.sm); + call->getBeginLoc().print(debug(FNARG), *Mod.sm); debug(FNARG) << "\n"; //debug(FNARG) << " argcount=" << call->getNumArgs() << "\n"; - //loc = call->getArg(0)->getLocStart(); + //loc = call->getArg(0)->getBeginLoc(); } else if (Mod.sm->isInSystemHeader(func->getLocation())) { debug(FNARG) << "in system header\n"; return; diff --git a/tools/lavaTool/include/FieldDeclArgAdditionHandler.h b/tools/lavaTool/include/FieldDeclArgAdditionHandler.h index 6b788815..5b17ce34 100644 --- a/tools/lavaTool/include/FieldDeclArgAdditionHandler.h +++ b/tools/lavaTool/include/FieldDeclArgAdditionHandler.h @@ -18,21 +18,21 @@ struct FieldDeclArgAdditionHandler : public LavaMatchHandler { virtual void handle(const MatchFinder::MatchResult &Result) { const FieldDecl *fd = Result.Nodes.getNodeAs("fielddecl"); - SourceLocation l1 = fd->getLocStart(); - SourceLocation l2 = fd->getLocEnd(); + SourceLocation l1 = fd->getBeginLoc(); + SourceLocation l2 = fd->getEndLoc(); bool inv = false; debug(FNARG) << "fielddecl : [" << getStringBetweenRange(*Mod.sm, fd->getSourceRange(), &inv) << "]\n"; if (inv) { debug(FNARG) << "... is invalid\n"; return; } - const Type *ft = fd->getType().getTypePtr(); + const clang::Type *ft = fd->getType().getTypePtr(); if (ft->isFunctionPointerType()) { // field is a fn pointer - const Type *pt = ft->getPointeeType().IgnoreParens().getTypePtr(); + const clang::Type *pt = ft->getPointeeType().IgnoreParens().getTypePtr(); //assert(pt); if (!pt) return; - const FunctionType *fun_type = dyn_cast(pt); + const clang::FunctionType *fun_type = dyn_cast(pt); if (fun_type == NULL) { debug(FNARG) << "... clang could not determine function type, abort\n"; return; @@ -40,11 +40,11 @@ struct FieldDeclArgAdditionHandler : public LavaMatchHandler { //assert(fun_type); if (!fun_type) return; - const FunctionProtoType *prot = dyn_cast(fun_type); + const clang::FunctionProtoType *prot = dyn_cast(fun_type); if (!prot) return; // add the data_flow arg - SourceLocation l1 = fd->getLocStart(); - SourceLocation l2 = fd->getLocEnd(); + SourceLocation l1 = fd->getBeginLoc(); + SourceLocation l2 = fd->getEndLoc(); AddArgGen(Mod, l1, l2, false, prot->getNumParams(), 2); } } diff --git a/tools/lavaTool/include/FuncDeclArgAdditionHandler.h b/tools/lavaTool/include/FuncDeclArgAdditionHandler.h index ffa12910..c5b241cd 100644 --- a/tools/lavaTool/include/FuncDeclArgAdditionHandler.h +++ b/tools/lavaTool/include/FuncDeclArgAdditionHandler.h @@ -5,10 +5,10 @@ struct FuncDeclArgAdditionHandler : public LavaMatchHandler { using LavaMatchHandler::LavaMatchHandler; // Inherit constructor void AddArg(const FunctionDecl *func) { - SourceLocation l1 = func->getLocStart(); - SourceLocation l2 = func->getLocEnd(); - debug(FNARG) << "func->getLocStart = " << Mod.sm->getFileOffset(l1) << "\n"; - debug(FNARG) << "func->getLocEnd = " << Mod.sm->getFileOffset(l2) << "\n"; + SourceLocation l1 = func->getBeginLoc(); + SourceLocation l2 = func->getEndLoc(); + debug(FNARG) << "func->getBeginLoc = " << Mod.sm->getFileOffset(l1) << "\n"; + debug(FNARG) << "func->getEndLoc = " << Mod.sm->getFileOffset(l2) << "\n"; bool inv; debug(FNARG) << "func : [" << getStringBetweenRange(*Mod.sm, func->getSourceRange(), &inv) << "]\n"; @@ -102,7 +102,7 @@ struct FuncDeclArgAdditionHandler : public LavaMatchHandler { int data_slots_size = (data_slots.size() > 0) ? data_slots.size() : 1; data_array << "int data[" << data_slots_size << "] = {0};\n"; data_array << "int *" ARG_NAME << "= &data;\n"; - Mod.InsertAt(first->getLocStart(), data_array.str()); + Mod.InsertAt(first->getBeginLoc(), data_array.str()); } } else { const FunctionDecl *bodyDecl = nullptr; diff --git a/tools/lavaTool/include/FunctionArgHandler.h b/tools/lavaTool/include/FunctionArgHandler.h index 1055cd76..b1c0ad1f 100644 --- a/tools/lavaTool/include/FunctionArgHandler.h +++ b/tools/lavaTool/include/FunctionArgHandler.h @@ -20,8 +20,8 @@ struct FunctionArgHandler : public LavaMatchHandler { const SourceManager &sm = *Result.SourceManager; - auto sl1 = call->getLocStart(); - auto sl2 = call->getLocEnd(); + auto sl1 = call->getBeginLoc(); + auto sl2 = call->getEndLoc(); debug(FNARG) << "start: " << sl1.printToString(sm) << "\n"; debug(FNARG) << "end: " << sl2.printToString(sm) << "\n"; diff --git a/tools/lavaTool/include/FunctionPointerFieldHandler.h b/tools/lavaTool/include/FunctionPointerFieldHandler.h index b5fa49be..55e2c419 100644 --- a/tools/lavaTool/include/FunctionPointerFieldHandler.h +++ b/tools/lavaTool/include/FunctionPointerFieldHandler.h @@ -15,19 +15,19 @@ struct FunctionPointerFieldHandler : public LavaMatchHandler { } else { - const Type *t = fd->getType().getTypePtr(); + const clang::Type *t = fd->getType().getTypePtr(); if (t->isPointerType()) { // || t->isArrayType()) { - const Type *pt = t->getPointeeType().getTypePtr(); // t->getPointeeOrArrayElementType(); + const clang::Type *pt = t->getPointeeType().getTypePtr(); // t->getPointeeOrArrayElementType(); if (pt->isFunctionType()) debug(FNARG) << "Its a fn pointer!\n"; - auto sl1 = fd->getLocStart(); - auto sl2 = fd->getLocEnd(); + auto sl1 = fd->getBeginLoc(); + auto sl2 = fd->getEndLoc(); debug(FNARG) << "start: " << sl1.printToString(*Mod.sm) << "\n"; debug(FNARG) << "end: " << sl2.printToString(*Mod.sm) << "\n"; } - // debug(FNARG) << decl->getLocEnd().printToString(*Mod.sm) << "\n"; - // Mod.InsertAt(decl->getLocEnd().getLocWithOffset(-14), "int *" ARG_NAME ", "); + // debug(FNARG) << decl->getEndLoc().printToString(*Mod.sm) << "\n"; + // Mod.InsertAt(decl->getEndLoc().getLocWithOffset(-14), "int *" ARG_NAME ", "); } } }; diff --git a/tools/lavaTool/include/FunctionPointerTypedefHandler.h b/tools/lavaTool/include/FunctionPointerTypedefHandler.h index 2c8c31c8..9340c0cb 100644 --- a/tools/lavaTool/include/FunctionPointerTypedefHandler.h +++ b/tools/lavaTool/include/FunctionPointerTypedefHandler.h @@ -6,26 +6,26 @@ struct FunctionPointerTypedefHandler : public LavaMatchHandler { virtual void handle(const MatchFinder::MatchResult &Result) { const TypedefDecl *td = Result.Nodes.getNodeAs("typedefdecl"); - SourceLocation l1 = td->getLocStart(); - SourceLocation l2 = td->getLocEnd(); + SourceLocation l1 = td->getBeginLoc(); + SourceLocation l2 = td->getEndLoc(); bool inv=false; debug(FNARG) << "typedefdecl : [" << getStringBetweenRange(*Mod.sm, td->getSourceRange(), &inv) << "\n"; if (inv) { debug(FNARG) << "... is invalid\n"; return; } - const Type *ft = td->getUnderlyingType().getTypePtr(); + const clang::Type *ft = td->getUnderlyingType().getTypePtr(); //assert(ft); if (!ft) return; if (ft->isFunctionPointerType()) { // field is a fn pointer - const Type *pt = ft->getPointeeType().IgnoreParens().getTypePtr(); + const clang::Type *pt = ft->getPointeeType().IgnoreParens().getTypePtr(); //assert(pt); if (!pt) return; - const FunctionType *fun_type = dyn_cast(pt); + const clang::FunctionType *fun_type = dyn_cast(pt); //assert(fun_type); if (!fun_type) return; - const FunctionProtoType *prot = dyn_cast(fun_type); + const clang::FunctionProtoType *prot = dyn_cast(fun_type); // add the data_flow arg //assert(prot); if (!prot) return; diff --git a/tools/lavaTool/include/LavaMatchHandler.h b/tools/lavaTool/include/LavaMatchHandler.h index 1512d6c2..83c55ab4 100644 --- a/tools/lavaTool/include/LavaMatchHandler.h +++ b/tools/lavaTool/include/LavaMatchHandler.h @@ -150,8 +150,8 @@ struct LavaMatchHandler : public MatchFinder::MatchCallback { LavaASTLoc GetASTLoc(const SourceManager &sm, const Stmt *s) { assert(!SourceDir.empty()); - FullSourceLoc fullLocStart(sm.getExpansionLoc(s->getLocStart()), sm); - FullSourceLoc fullLocEnd(sm.getExpansionLoc(s->getLocEnd()), sm); + FullSourceLoc fullLocStart(sm.getExpansionLoc(s->getBeginLoc()), sm); + FullSourceLoc fullLocEnd(sm.getExpansionLoc(s->getEndLoc()), sm); std::string src_filename = StripPrefix( getAbsolutePath(sm.getFilename(fullLocStart)), SourceDir); return LavaASTLoc(src_filename, fullLocStart, fullLocEnd); @@ -268,11 +268,11 @@ struct LavaMatchHandler : public MatchFinder::MatchCallback { for (auto &keyValue : nodesMap) { const Stmt *stmt = keyValue.second.get(); if (stmt) { - SourceLocation start = stmt->getLocStart(); + SourceLocation start = stmt->getBeginLoc(); if (!sm.getFilename(start).empty() && sm.isInMainFile(start) && !sm.isMacroArgExpansion(start)) { debug(MATCHER) << keyValue.first << ": " << ExprStr(stmt) << " "; - stmt->getLocStart().print(debug(MATCHER), sm); + stmt->getBeginLoc().print(debug(MATCHER), sm); debug(MATCHER) << "\n"; if (DEBUG_FLAGS & MATCHER) stmt->dump(); } else return; diff --git a/tools/lavaTool/include/MatchFinder.h b/tools/lavaTool/include/MatchFinder.h index f111dff7..f0e436cd 100644 --- a/tools/lavaTool/include/MatchFinder.h +++ b/tools/lavaTool/include/MatchFinder.h @@ -159,10 +159,12 @@ class LavaMatchFinder : public MatchFinder, public SourceFileCallbacks { makeHandler() ); } - virtual bool handleBeginSource(CompilerInstance &CI, StringRef Filename) override { + + virtual bool handleBeginSource(CompilerInstance &CI) override { Insert.clear(); Mod.Reset(&CI.getLangOpts(), &CI.getSourceManager()); TUReplace.Replacements.clear(); + std::string Filename = CI.getSourceManager().getFileEntryForID(CI.getSourceManager().getMainFileID())->getName().str(); // Convert StringRef to std::string TUReplace.MainSourceFile = Filename; CurrentCI = &CI; @@ -232,7 +234,7 @@ class LavaMatchFinder : public MatchFinder, public SourceFileCallbacks { Insert.render(CurrentCI->getSourceManager(), TUReplace.Replacements); std::error_code EC; llvm::raw_fd_ostream YamlFile(TUReplace.MainSourceFile + ".yaml", - EC, llvm::sys::fs::F_RW); + EC, llvm::sys::fs::OF_None); yaml::Output Yaml(YamlFile); Yaml << TUReplace; } diff --git a/tools/lavaTool/include/Modifier.h b/tools/lavaTool/include/Modifier.h index d63304b5..21c68091 100644 --- a/tools/lavaTool/include/Modifier.h +++ b/tools/lavaTool/include/Modifier.h @@ -29,10 +29,10 @@ class Modifier { sm = sm_; } - std::pair range() const { - auto startRange = sm->getExpansionRange(stmt->getLocStart()); - auto endRange = sm->getExpansionRange(stmt->getLocEnd()); - return std::make_pair(startRange.first, endRange.second); + std::pair range() const { + auto startRange = sm->getExpansionRange(stmt->getBeginLoc()); + auto endRange = sm->getExpansionRange(stmt->getEndLoc()); + return std::make_pair(startRange.getBegin(), endRange.getEnd()); } SourceLocation before() const { diff --git a/tools/lavaTool/include/VarDeclArgAdditionHandler.h b/tools/lavaTool/include/VarDeclArgAdditionHandler.h index 12ab16fb..33482e3c 100644 --- a/tools/lavaTool/include/VarDeclArgAdditionHandler.h +++ b/tools/lavaTool/include/VarDeclArgAdditionHandler.h @@ -8,21 +8,21 @@ struct VarDeclArgAdditionHandler : public LavaMatchHandler { virtual void handle(const MatchFinder::MatchResult &Result) { const VarDecl *vd = Result.Nodes.getNodeAs("vardecl"); - SourceLocation l1 = vd->getLocStart(); - SourceLocation l2 = vd->getLocEnd(); + SourceLocation l1 = vd->getBeginLoc(); + SourceLocation l2 = vd->getEndLoc(); bool inv = false; debug(FNARG) << "vardecl : [" << getStringBetweenRange(*Mod.sm, vd->getSourceRange(), &inv) << "]\n"; if (inv) { debug(FNARG) << "... is invalid\n"; return; } - const Type *ft = vd->getType().getTypePtr(); + const clang::Type *ft = vd->getType().getTypePtr(); assert (ft); if (ft->isFunctionPointerType()) { // field is a fn pointer - const Type *pt = ft->getPointeeType().IgnoreParens().getTypePtr(); + const clang::Type *pt = ft->getPointeeType().IgnoreParens().getTypePtr(); assert(pt); - const FunctionType *fun_type = dyn_cast(pt); + const clang::FunctionType *fun_type = dyn_cast(pt); //assert(fun_type); if (!fun_type) return; const FunctionProtoType *prot = dyn_cast(fun_type); diff --git a/tools/lavaTool/include/lavaTool.h b/tools/lavaTool/include/lavaTool.h index 18aba668..811cc893 100644 --- a/tools/lavaTool/include/lavaTool.h +++ b/tools/lavaTool/include/lavaTool.h @@ -69,7 +69,7 @@ using clang::tooling::CommonOptionsParser; #define MAX_STRNLEN 64 -static llvm::raw_null_ostream null_ostream; +static llvm::raw_ostream &null_ostream = llvm::nulls(); #define debug(flag) ((DEBUG_FLAGS & (flag)) ? llvm::errs() : null_ostream) enum action { LavaQueries, LavaInjectBugs, LavaInstrumentMain }; @@ -136,8 +136,7 @@ static cl::extrahelp MoreHelp( static cl::opt LavaAction("action", cl::desc("LAVA Action"), cl::values( clEnumValN(LavaQueries, "query", "Add taint queries"), - clEnumValN(LavaInjectBugs, "inject", "Inject bugs"), - clEnumValEnd), + clEnumValN(LavaInjectBugs, "inject", "Inject bugs")), cl::cat(LavaCategory), cl::Required); static cl::opt LavaBugList("bug-list", @@ -204,7 +203,7 @@ namespace { } void my_terminate(void) { - static bool tried_throw = false; + static int tried_throw = false; std::cerr << "TEST\n"; @@ -284,7 +283,7 @@ std::string StripPrefix(std::string filename, std::string prefix) { return filename.substr(prefix_len); } -bool QueriableType(const Type *lval_type) { +bool QueriableType(const clang::Type *lval_type) { if ((lval_type->isIncompleteType()) || (lval_type->isIncompleteArrayType()) || (lval_type->isVoidType()) @@ -293,23 +292,24 @@ bool QueriableType(const Type *lval_type) { return false; } if (lval_type->isPointerType()) { - const Type *pt = lval_type->getPointeeType().getTypePtr(); + const clang::Type *pt = lval_type->getPointeeType().getTypePtr(); return QueriableType(pt); } return true; } + bool IsArgAttackable(const Expr *arg) { debug(MATCHER) << "IsArgAttackable \n"; if (DEBUG_FLAGS & MATCHER) arg->dump(); - const Type *t = arg->IgnoreParenImpCasts()->getType().getTypePtr(); + const clang::Type *t = arg->IgnoreParenImpCasts()->getType().getTypePtr(); if (dyn_cast(arg) || t->isStructureType() || t->isEnumeralType() || t->isIncompleteType()) { return false; } if (QueriableType(t)) { if (t->isPointerType()) { - const Type *pt = t->getPointeeType().getTypePtr(); + const clang::Type *pt = t->getPointeeType().getTypePtr(); // its a pointer to a non-void if ( ! (pt->isVoidType() ) ) { return true; @@ -388,23 +388,23 @@ LExpr threeDuaTest(Bug *bug, LvalBytes x, LvalBytes y) { auto oldmagic = bug->magic; - printf("Bug %llu solutions\n", bug->id); + printf("Bug %lu solutions\n", bug->id); const int NUM_BUGTYPES=3; // Todo remove the pring switch or print to a debug output switch (oldmagic % NUM_BUGTYPES) { case 0: bug->magic = (a_sol + b_sol) * c_sol; - printf("SOL 0x%llx == (0x%x + 0x%x) * 0x%x\n", bug->id, a_sol, b_sol, c_sol); + printf("SOL 0x%lx == (0x%x + 0x%x) * 0x%x\n", bug->id, a_sol, b_sol, c_sol); break; case 1: bug->magic = (a_sol * b_sol) - c_sol; - printf("SOL 0x%llx id == (0x%x * 0x%x) - 0x%x\n", bug->id, a_sol, b_sol, c_sol); + printf("SOL 0x%lx id == (0x%x * 0x%x) - 0x%x\n", bug->id, a_sol, b_sol, c_sol); break; case 2: bug->magic = (a_sol+2) * (b_sol+1) * (c_sol+3); - printf("SOL 0x%llx id == (0x%x+2) *( 0x%x+1) * (0x%x+3) \n", bug->id, a_sol, b_sol, c_sol); + printf("SOL 0x%lx id == (0x%x+2) *( 0x%x+1) * (0x%x+3) \n", bug->id, a_sol, b_sol, c_sol); break; } @@ -435,9 +435,11 @@ LExpr twoDuaTest(const Bug *bug, LvalBytes x) { return (Get(bug->trigger)^Get(x)) == LHex(bug->magic); } -static void printVersion() { - errs() << "LavaTool Version -- " << LAVA_VER << "\n"; +static void printVersion(llvm::raw_ostream &OS) { + OS << "LavaFnTool Version -- " << LAVA_VER << "\n"; } + + // returns true iff this fn name is in whitelist to be instrumented bool fninstr(std::pair fnname) { std::string filename = fnname.first; diff --git a/tools/lavaTool/src/CMakeLists.txt b/tools/lavaTool/src/CMakeLists.txt index 59816406..8c4d3b3c 100644 --- a/tools/lavaTool/src/CMakeLists.txt +++ b/tools/lavaTool/src/CMakeLists.txt @@ -1,8 +1,31 @@ -project (lavaTool VERSION 2.0.0 LANGUAGES CXX) -# LLVM variables to compile our clang tool -set (LLVM_LINK_COMPONENTS support) +cmake_minimum_required(VERSION 3.15) +project(lavaTool VERSION 2.0.0 LANGUAGES CXX) -set (CLANG_LIBS +# Set C++ Standard +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +# Find LLVM and Clang +if(DEFINED ENV{LLVM_DIR}) + list(APPEND CMAKE_PREFIX_PATH "$ENV{LLVM_DIR}/lib/cmake/llvm") + list(APPEND CMAKE_PREFIX_PATH "$ENV{LLVM_DIR}/lib/cmake/clang") +endif() + +find_package(LLVM REQUIRED CONFIG) +find_package(Clang REQUIRED CONFIG) + +# Print the include directories for debugging +message(STATUS "LLVM include dirs: ${LLVM_INCLUDE_DIRS}") +message(STATUS "Clang include dirs: ${CLANG_INCLUDE_DIRS}") + +# Add LLVM and Clang include directories +include_directories(SYSTEM ${LLVM_INCLUDE_DIRS} ${CLANG_INCLUDE_DIRS}) +include_directories(/usr/include/jsoncpp) +link_directories(${LLVM_LIBRARY_DIRS}) + +# Define LLVM and Clang libraries +set(LLVM_CLANG_LINK_LIBRARIES clangAST clangASTMatchers clangBasic @@ -10,115 +33,85 @@ set (CLANG_LIBS clangLex clangToolingCore clangTooling - ) - -set (LLVM_LIBS - LLVMSupport ) -if(DEFINED ENV{LLVM_DIR}) - find_package(LLVM CONFIG) -endif() +include_directories(${PROJECT_SOURCE_DIR}/../include) +include_directories(${CMAKE_SOURCE_DIR}/include) -set (LLVM_CLANG_LINK_LIBRARIES - ${CLANG_LIBS} - ${LLVM_LIBS} -) +message(STATUS "PROJECT_SOURCE_DIR: ${PROJECT_SOURCE_DIR}") +message(STATUS "CMAKE_SOURCE_DIR: ${CMAKE_SOURCE_DIR}") +message(STATUS "LLVM_CLANG_LINK_LIBRARIES: ${LLVM_CLANG_LINK_LIBRARIES}") -message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") -message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") - -if(LLVM_BUILD_MAIN_SRC_DIR) - include_directories(${LLVM_BUILD_MAIN_SRC_DIR}/tools/clang/include) - include_directories(${LLVM_BUILD_BINARY_DIR}/tools/clang/include) +if(NOT EXISTS "${CMAKE_SOURCE_DIR}/include/lava_version.h") + message(FATAL_ERROR "Missing lava_version.h") endif() -link_directories(${LLVM_LIBRARY_DIRS}) -add_definitions(${LLVM_DEFINITIONS}) - -include_directories(${LLVM_INCLUDE_DIRS}) - -# omg target compiled as static library -add_library (omg STATIC omg.cpp) -set_property(TARGET omg PROPERTY CXX_STANDARD 11) -target_compile_options(omg PRIVATE -D_GLIBCXX_USE_CXX11_ABI=0) -#set_target_properties(omg PROPERTIES COMPILE_FLAGS "-m32" LINK_FLAGS "-m32 -flto -fuse-ld=gold") -set_target_properties(omg PROPERTIES LINK_FLAGS "-flto -fuse-ld=gold") -target_include_directories(omg BEFORE - PUBLIC - ${CMAKE_CURRENT_SOURCE_DIR}/../include - ) - +# Create OMG static library +add_library(omg STATIC ../src/omg.cpp) +target_compile_features(omg PRIVATE cxx_std_17) +target_include_directories(omg PRIVATE ${LLVM_INCLUDE_DIRS} ${CLANG_INCLUDE_DIRS}) +target_link_libraries(omg PRIVATE ${LLVM_CLANG_LINK_LIBRARIES}) -# lavaTool target compiled against llvm, omg, odb odb-pgsql and lava odb +# Create lavaTool executable add_executable(lavaTool lavaTool.cpp) -target_compile_options(lavaTool PRIVATE -D_GLIBCXX_USE_CXX11_ABI=0 -fno-omit-frame-pointer -O3 -fexceptions -frtti) -#set_target_properties(lavaTool PROPERTIES COMPILE_FLAGS "-m32" LINK_FLAGS "-m32 -flto -fuse-ld=gold") +target_compile_features(lavaTool PRIVATE cxx_std_17) +target_include_directories(lavaTool PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../../lavaDB/include + ${CMAKE_CURRENT_SOURCE_DIR}/../../lavaODB/generated + ${CMAKE_CURRENT_SOURCE_DIR}/../../lavaODB/include + ${CMAKE_CURRENT_SOURCE_DIR}/../../include + ${CMAKE_CURRENT_SOURCE_DIR}/../include + ${LLVM_INCLUDE_DIRS} + ${CLANG_INCLUDE_DIRS} +) +target_link_libraries(lavaTool PRIVATE lavaDB_x64 omg odb odb-pgsql lava-odb_x64 ${LLVM_CLANG_LINK_LIBRARIES}) set_target_properties(lavaTool PROPERTIES LINK_FLAGS "-flto -fuse-ld=gold") -set_property(TARGET lavaTool PROPERTY CXX_STANDARD 14) -target_include_directories(lavaTool BEFORE - PUBLIC - ${CMAKE_CURRENT_SOURCE_DIR}/../../lavaDB/include - ${CMAKE_CURRENT_SOURCE_DIR}/../../lavaODB/generated - ${CMAKE_CURRENT_SOURCE_DIR}/../../lavaODB/include - ${CMAKE_CURRENT_SOURCE_DIR}/../../include - ${CMAKE_CURRENT_SOURCE_DIR}/../include - ) - -# lavaFnTool target compiled against llvm, omg, odb odb-pgsql and lava odb + +# Create lavaFnTool executable add_executable(lavaFnTool lavaFnTool.cpp) -target_compile_options(lavaFnTool PRIVATE -D_GLIBCXX_USE_CXX11_ABI=0 -fno-omit-frame-pointer -O3 -fexceptions -frtti) -#set_target_properties(lavaFnTool PROPERTIES COMPILE_FLAGS "-m32" LINK_FLAGS "-m32 -flto -fuse-ld=gold") +target_compile_features(lavaFnTool PRIVATE cxx_std_17) +target_include_directories(lavaFnTool PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../../lavaDB/include + ${CMAKE_CURRENT_SOURCE_DIR}/../../lavaODB/generated + ${CMAKE_CURRENT_SOURCE_DIR}/../../lavaODB/include + ${CMAKE_CURRENT_SOURCE_DIR}/../../include + ${CMAKE_CURRENT_SOURCE_DIR}/../include + ${LLVM_INCLUDE_DIRS} + ${CLANG_INCLUDE_DIRS} +) +target_link_libraries(lavaFnTool PRIVATE lavaDB_x64 omg odb odb-pgsql lava-odb_x64 ${LLVM_CLANG_LINK_LIBRARIES}) set_target_properties(lavaFnTool PROPERTIES LINK_FLAGS "-flto -fuse-ld=gold") -set_property(TARGET lavaFnTool PROPERTY CXX_STANDARD 14) -target_include_directories(lavaFnTool BEFORE - PUBLIC - ${CMAKE_CURRENT_SOURCE_DIR}/../../lavaDB/include - ${CMAKE_CURRENT_SOURCE_DIR}/../../lavaODB/generated - ${CMAKE_CURRENT_SOURCE_DIR}/../../lavaODB/include - ${CMAKE_CURRENT_SOURCE_DIR}/../../include - ${CMAKE_CURRENT_SOURCE_DIR}/../include - ) +# Create lavaInitTool executable +add_executable(lavaInitTool lavaInitTool.cpp) +target_compile_features(lavaInitTool PRIVATE cxx_std_17) +target_include_directories(lavaInitTool PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../../lavaODB/include + ${CMAKE_CURRENT_SOURCE_DIR}/../../include + ${CMAKE_CURRENT_SOURCE_DIR}/../include + ${LLVM_INCLUDE_DIRS} + ${CLANG_INCLUDE_DIRS} +) +target_link_libraries(lavaInitTool PRIVATE ${LLVM_CLANG_LINK_LIBRARIES}) +set_target_properties(lavaInitTool PROPERTIES LINK_FLAGS "-flto -fuse-ld=gold") + +# Add dependencies add_dependencies(lavaTool lavaFnTool) -# lavaInitTool target compiled against llvm -# TODO: this shouldn't depend on ODB but it needs it for lava.hxx? -add_executable(lavaInitTool lavaInitTool.cpp) -target_compile_options(lavaInitTool PRIVATE -D_GLIBCXX_USE_CXX11_ABI=0 -fno-omit-frame-pointer -O3 -fexceptions -frtti) -#set_target_properties(lavaInitTool PROPERTIES COMPILE_FLAGS "-m32" LINK_FLAGS "-m32 -flto -fuse-ld=gold") -set_target_properties(lavaInitTool PROPERTIES LINK_FLAGS "-flto -fuse-ld=gold") -set_property(TARGET lavaInitTool PROPERTY CXX_STANDARD 14) -target_include_directories(lavaInitTool BEFORE - PUBLIC - ${CMAKE_CURRENT_SOURCE_DIR}/../../lavaODB/include - ${CMAKE_CURRENT_SOURCE_DIR}/../../include - ${CMAKE_CURRENT_SOURCE_DIR}/../include - ) - -#target_link_libraries(lavaTool lavaDB_x32 omg odb odb-pgsql lava-odb_x32 ${LLVM_CLANG_LINK_LIBRARIES}) -#target_link_libraries(lavaFnTool lavaDB_x32 omg odb odb-pgsql lava-odb_x32 ${LLVM_CLANG_LINK_LIBRARIES}) -target_link_libraries(lavaTool lavaDB_x64 omg odb odb-pgsql lava-odb_x64 ${LLVM_CLANG_LINK_LIBRARIES}) -target_link_libraries(lavaFnTool lavaDB_x64 omg odb odb-pgsql lava-odb_x64 ${LLVM_CLANG_LINK_LIBRARIES}) -target_link_libraries(lavaInitTool ${LLVM_CLANG_LINK_LIBRARIES}) - -install (TARGETS lavaTool - RUNTIME DESTINATION bin - LIBRARY DESTINATION lib - ARCHIVE DESTINATION lib/static - OPTIONAL - ) - -install (TARGETS lavaFnTool - RUNTIME DESTINATION bin - LIBRARY DESTINATION lib - ARCHIVE DESTINATION lib/static - OPTIONAL - ) - -install (TARGETS lavaInitTool - RUNTIME DESTINATION bin - LIBRARY DESTINATION lib - ARCHIVE DESTINATION lib/static - OPTIONAL - ) +# Install targets +install(TARGETS lavaTool + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib/static +) +install(TARGETS lavaFnTool + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib/static +) +install(TARGETS lavaInitTool + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib/static +) + diff --git a/tools/lavaTool/src/lavaFnTool.cpp b/tools/lavaTool/src/lavaFnTool.cpp index 0d7992c8..eed2858f 100644 --- a/tools/lavaTool/src/lavaFnTool.cpp +++ b/tools/lavaTool/src/lavaFnTool.cpp @@ -18,25 +18,22 @@ #define DEBUG_FLAGS 0 // ( LOG ) using namespace clang::tooling; -using namespace llvm;using namespace clang; +using namespace llvm; using namespace clang; using namespace clang::ast_matchers; using namespace std; - -static llvm::raw_null_ostream null_ostream; +static llvm::raw_ostream &null_ostream = llvm::nulls(); #define debug(flag) ((DEBUG_FLAGS & (flag)) ? llvm::errs() : null_ostream) - - static cl::OptionCategory LavaFnCategory("LAVA Function diagnosis"); static cl::extrahelp CommonHelp(CommonOptionsParser::HelpMessage); static cl::extrahelp MoreHelp( "\nIdentify all fn defs, prototypes, calls for later use by LAVA.\n"); -static void printVersion() { - errs() << "LavaFnTool Version -- " << LAVA_VER << "\n"; +void printVersion(llvm::raw_ostream &OS) { + OS << "LavaFnTool Version -- " << LAVA_VER << "\n"; } ofstream outfile; @@ -56,7 +53,7 @@ void spit_fun_decl(const FunctionDecl *fundecl) { outfile << " params: \n"; for (auto p : fundecl->parameters()) { QualType ot = p->getOriginalType(); - const Type *otp = ot.getTypePtr(); + const clang::Type *otp = ot.getTypePtr(); if (otp->isFunctionType() || otp->isFunctionPointerType()) { spit_type(" - param: fnptr ", ot); } @@ -68,10 +65,14 @@ void spit_fun_decl(const FunctionDecl *fundecl) { void spit_source_locs(const char *spaces, const Expr *expr, const SourceManager &sm) { - auto sl1 = expr->getLocStart(); - auto sl2 = expr->getLocEnd(); - outfile << (string(spaces) + "start: ") << sl1.printToString(sm) << "\n"; - outfile << (string(spaces) + "end: ") << sl2.printToString(sm) << "\n"; + clang::SourceLocation sl1 = expr->getBeginLoc(); + clang::SourceLocation sl2 = expr->getEndLoc(); + if (sl1.isValid()) { + outfile << (string(spaces) + "start: ") << sl1.printToString(sm) << "\n"; + } + if (sl2.isValid()) { + outfile << (string(spaces) + "end: ") << sl2.printToString(sm) << "\n"; + } } @@ -155,14 +156,14 @@ class CallPrinter : public MatchFinder::MatchCallback { std::string fun_name = get_containing_function_name(Result, *call); outfile << " containing_function: " << fun_name << "\n"; - QualType rt = call->getCallReturnType();//(Result.Context); + QualType rt = call->getCallReturnType(*Result.Context); spit_type( " ret_type: ", rt); outfile << " args: \n"; for (auto it = call->arg_begin(); it != call->arg_end(); ++it) { const Expr *arg = dyn_cast(*it); arg = arg->IgnoreImpCasts(); QualType at = arg->IgnoreImpCasts()->getType(); - const Type *atp = at.getTypePtr(); + const clang::Type *atp = at.getTypePtr(); string expstr, type, info; outfile << " - arg: \n"; if (atp->isFunctionType()) { @@ -201,7 +202,7 @@ class FnPtrAssignmentPrinter : public MatchFinder::MatchCallback { virtual void run(const MatchFinder::MatchResult &Result) { const BinaryOperator *bo = Result.Nodes.getNodeAs("bo"); Expr *rhs = bo->getRHS()->IgnoreImpCasts(); - const Type *rhst = rhs->getType().getTypePtr(); + const clang::Type *rhst = rhs->getType().getTypePtr(); if (rhst->isFunctionType()) { outfile << "- fnPtrAssign: \n"; spit_source_locs(" ", bo, *Result.SourceManager); @@ -219,16 +220,20 @@ class VarDeclPrinter : public MatchFinder::MatchCallback { public : virtual void run(const MatchFinder::MatchResult &Result) { const VarDecl *vd = Result.Nodes.getNodeAs("vd"); - const Type *et = vd->getType().getTypePtr(); + const clang::Type *et = vd->getType().getTypePtr(); if (vd->hasInit() && et->isPointerType()) { const Expr *init = vd->getInit()->IgnoreImpCasts(); - const Type *it = init->getType().getTypePtr(); + const clang::Type *it = init->getType().getTypePtr(); if (it->isFunctionType()) { outfile << "- fnPtrAssign:\n"; - auto sl1 = vd->getLocStart(); - auto sl2 = vd->getLocEnd(); - outfile << " start: " << sl1.printToString(*Result.SourceManager) << "\n"; - outfile << " end: " << sl2.printToString(*Result.SourceManager) << "\n"; + clang::SourceLocation sl1 = vd->getBeginLoc(); + clang::SourceLocation sl2 = vd->getEndLoc(); + if (sl1.isValid()) { + outfile << " start: " << sl1.printToString(*Result.SourceManager) << "\n"; + } + if (sl2.isValid()) { + outfile << " end: " << sl2.printToString(*Result.SourceManager) << "\n"; + } const DeclRefExpr *dre = llvm::dyn_cast(init); outfile << " name: " << dre->getNameInfo().getAsString() << "\n"; const FunctionDecl *fndecl = llvm::dyn_cast(dre->getDecl()); @@ -248,10 +253,15 @@ class FunctionPrinter : public MatchFinder::MatchCallback { // if (func->isExternC()) return; if (func) { outfile << "- fun: \n"; - auto sl1 = func->getLocStart(); - auto sl2 = func->getLocEnd(); - outfile << " start: " << sl1.printToString(*Result.SourceManager) << "\n"; - outfile << " end: " << sl2.printToString(*Result.SourceManager) << "\n"; + clang::SourceLocation sl1 = func->getBeginLoc(); + clang::SourceLocation sl2 = func->getEndLoc(); + if (sl1.isValid()) { + outfile << " start: " << sl1.printToString(*Result.SourceManager) << "\n"; + } + if (sl2.isValid()) { + outfile << " end: " << sl2.printToString(*Result.SourceManager) << "\n"; + } + outfile << " name: " << (func->getNameInfo().getAsString()) << "\n"; if (func->doesThisDeclarationHaveABody()) outfile << " hasbody: true\n"; diff --git a/tools/lavaTool/src/lavaInitTool.cpp b/tools/lavaTool/src/lavaInitTool.cpp index beed8d1c..4ec1b79c 100644 --- a/tools/lavaTool/src/lavaInitTool.cpp +++ b/tools/lavaTool/src/lavaInitTool.cpp @@ -1,4 +1,4 @@ -// Clang rewriter to initialize all unitialized variables +// Clang rewriter to initialize all uninitialized variables // to '={0}' (AKA null for any type) // It's a bit messy because it duplicates some classes @@ -31,7 +31,7 @@ #define LOG (1 << 0) #define INI (1 << 1) -#define DEBUG_FLAGS ( INI | LOG ) +#define DEBUG_FLAGS (INI | LOG) using namespace clang::tooling; using namespace llvm; @@ -40,7 +40,8 @@ using namespace clang::ast_matchers; using namespace std; static llvm::raw_null_ostream null_ostream; -#define debug(flag) ((DEBUG_FLAGS & (flag)) ? llvm::errs() : null_ostream) +#define debug(flag) ((DEBUG_FLAGS & (flag)) ? static_cast(llvm::errs()) : static_cast(null_ostream)) + static cl::OptionCategory LavaInitCategory("LAVA Init Tool"); static cl::extrahelp CommonHelp(CommonOptionsParser::HelpMessage); @@ -100,17 +101,16 @@ class Initializer : public MatchFinder, public SourceFileCallbacks { ); } - virtual bool handleBeginSource(CompilerInstance &CI, StringRef Filename) override { + virtual bool handleBeginSource(CompilerInstance &CI) override { Insert.clear(); Mod.Reset(&CI.getLangOpts(), &CI.getSourceManager()); TUReplace.Replacements.clear(); - TUReplace.MainSourceFile = Filename; + TUReplace.MainSourceFile = CI.getSourceManager().getFileEntryForID(CI.getSourceManager().getMainFileID())->getName().str(); // Convert StringRef to std::string CurrentCI = &CI; - debug(LOG) << "*** handleBeginSource for: " << Filename << "\n"; + debug(LOG) << "*** handleBeginSource for: " << TUReplace.MainSourceFile << "\n"; - for (auto it = MatchHandlers.begin(); - it != MatchHandlers.end(); it++) { + for (auto it = MatchHandlers.begin(); it != MatchHandlers.end(); it++) { (*it)->LangOpts = &CI.getLangOpts(); } return true; @@ -123,7 +123,7 @@ class Initializer : public MatchFinder, public SourceFileCallbacks { Insert.render(CurrentCI->getSourceManager(), TUReplace.Replacements); std::error_code EC; llvm::raw_fd_ostream YamlFile(TUReplace.MainSourceFile + ".yaml", - EC, llvm::sys::fs::F_RW); + EC, llvm::sys::fs::OF_None); yaml::Output Yaml(YamlFile); Yaml << TUReplace; } diff --git a/tools/lavaTool/src/lavaTool.cpp b/tools/lavaTool/src/lavaTool.cpp index 022a362a..ec0a2956 100644 --- a/tools/lavaTool/src/lavaTool.cpp +++ b/tools/lavaTool/src/lavaTool.cpp @@ -7,6 +7,7 @@ #include "lexpr.hxx" #include "lavaTool.h" #include "MatchFinder.h" +#include void parse_whitelist(std::string whitelist_filename) { debug(FNARG) << "parsing white list " << whitelist_filename << "\n"; @@ -66,7 +67,27 @@ int main(int argc, const char **argv) { errs() << "Error: Specify a database name with \"--db [name]\". Exiting . . .\n"; exit(1); } - db.reset(new odb::pgsql::database("postgres", "postgrespostgres", + const char* pgpass = std::getenv("PGPASS"); + const char* pguser = std::getenv("PGUSER"); + if (pgpass) { + // PGPASS environment variable is set, and pgpass points to its value. + std::cout << "PGPASS IS SET" << std::endl; + } else { + // PGPASS environment variable is not set. + std::cout << "PGPASS is not set" << std::endl; + exit(1); + } + + if (pguser) { + // PGUSER environment variable is set, and pgpass points to its value. + std::cout << "PGUSER IS SET: " << pguser << std::endl; + } else { + // PGUSER environment variable is not set. + std::cout << "PGUSER is not set" << std::endl; + exit(1); + } + + db.reset(new odb::pgsql::database(pguser, pgpass, DBName, DBHost, DBPort)); t = new odb::transaction(db->begin()); diff --git a/tools/lavaTool/src/omg.cpp b/tools/lavaTool/src/omg.cpp index d59add6c..19383532 100644 --- a/tools/lavaTool/src/omg.cpp +++ b/tools/lavaTool/src/omg.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -7,27 +6,23 @@ using namespace std; - #include "clang/AST/AST.h" #include "clang/Driver/Options.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Lex/Lexer.h" - - using namespace clang; //using namespace clang::ast_matchers; using namespace clang::driver; using namespace llvm; - #define PARENS (1 << 0) #define GENERAL (1 << 1) #define DEBUG_FLAGS 0 // (PARENS | GENERAL) static llvm::raw_null_ostream null_ostream; -#define debug(flag) ((DEBUG_FLAGS & (flag)) ? llvm::errs() : null_ostream) +#define debug(flag) ((DEBUG_FLAGS & (flag)) ? static_cast(llvm::errs()) : static_cast(null_ostream)) #include "omg.h" @@ -100,13 +95,8 @@ static llvm::raw_null_ostream null_ostream; enum NextThing {NtInvalid=1, NtOpen=2, NtClose=3}; -// This tuple is -// position in string (unsigned) -// isOpenParen (bool) -// level (unsigned) -typedef std::tuple < size_t, bool, unsigned > ParenInfo ; - -typedef std::vector < ParenInfo > ParensInfo; +typedef std::tuple ParenInfo; +typedef std::vector ParensInfo; // figure out paren info for this string. ParensInfo getParens(std::string sourceString) { @@ -118,164 +108,101 @@ ParensInfo getParens(std::string sourceString) { size_t nextClose = sourceString.find(")", searchLoc); NextThing nt = NtInvalid; - if (nextOpen != std::string::npos - && nextClose != std::string::npos) { + if (nextOpen != std::string::npos && nextClose != std::string::npos) { debug(PARENS) << "Both in bounds\n"; - // both are in bounds so we can compare them - // the next one is whichever comes first if (nextOpen < nextClose) nt = NtOpen; else nt = NtClose; - } - else { - // one or neither is in bounds - // whever is in bounds is next one + } else { debug(PARENS) << "One/neither in bounds\n"; if (nextOpen != std::string::npos) nt = NtOpen; else if (nextClose != std::string::npos) nt = NtClose; } - ParenInfo pt; - // no valid next open or close -- exit loop + if (nt == NtInvalid) break; size_t nextLoc; debug(PARENS) << "NT is " << ((nt==NtOpen)?"(":")") << "\n"; + ParenInfo pt; switch (nt) { - case NtOpen: - // '(' is next thing - nextLoc = nextOpen; - level ++; - pt = std::make_tuple(nextLoc, true, level); - break; - case NtClose: - // ')' is next thing - nextLoc = nextClose; - pt = std::make_tuple(nextLoc, false, level); - level --; - break; - default: - assert (1==0); // should not happen + case NtOpen: + nextLoc = nextOpen; + level++; + pt = std::make_tuple(nextLoc, true, level); + break; + case NtClose: + nextLoc = nextClose; + pt = std::make_tuple(nextLoc, false, level); + level--; + break; + default: + assert(false); // should not happen } - // collect the tuples parens.push_back(pt); - searchLoc = nextLoc+1; + searchLoc = nextLoc + 1; } debug(PARENS) << sourceString << "\n"; unsigned l = parens.size(); - if (l>0) { - //debug(PARENS) << "There are parens\n"; - - // Find adjacent open, open + close close at same levels, delete all elements - // in `parens` between 1st open and last close (inclusive) - // I'm not sorry for the goto - /* - for each element idx=x, level=1 open=T: - check if x+1 exists with open=T, level=2, if so: - Seek until open=F level=2 idx=y - if y+1 exists with open=F, level=1: - delete from x to y, inclusive - */ - -// This is a bad hack that will NOT WORK for all inputs and it never will. We need a better solution for data-flow -// injections into function arguments -// We often see lines with "__attribute__((foo)) fn_def(arg1, arg2)" or "fn_def(arg1, arg2) __attribute__((foo))" -// If the line we have contains "((" and "))" then ignore those when matching - std::string ws; - - if (sourceString.find("__attribute__") != std::string::npos) { + if (l > 0) { + std::string ws; + if (sourceString.find("__attribute__") != std::string::npos) { remove_attributes: for (auto p : parens) { ws = ""; - for (int i=0;i(p); i++) ws+=" "; + for (size_t i = 0; i < std::get<0>(p); i++) ws += " "; debug(PARENS) << ws << "| paren " << std::get<0>(p) - << " " << std::get<1>(p) - << " " << std::get<2>(p) << "\n"; + << " " << std::get<1>(p) + << " " << std::get<2>(p) << "\n"; } - // TODO something like __attribute__ ( (foo)) is probably valid too, need to ignore whitespace - for (auto oparen=parens.begin(); oparen != parens.end(); ++oparen) { - unsigned int o_idx = std::get<0>(*oparen); - bool o_open = std::get<1>(*oparen); + for (auto oparen = parens.begin(); oparen != parens.end(); ++oparen) { + unsigned int o_idx = std::get<0>(*oparen); + bool o_open = std::get<1>(*oparen); unsigned int o_level = std::get<2>(*oparen); - if (o_level != 1 || o_open!=true) continue; // We only want level 1 opens - if ((oparen+1) == parens.end()) continue; + if (o_level != 1 || !o_open) continue; + if ((oparen + 1) == parens.end()) continue; - // If there's another ( next, get it - auto oparen2 = oparen+1; - if (std::get<0>(*oparen2)==o_idx+1 && // Found adjacent open - std::get<1>(*oparen2)) { + auto oparen2 = oparen + 1; + if (std::get<0>(*oparen2) == o_idx + 1 && std::get<1>(*oparen2)) { debug(PARENS) << "\tFound set of adjacent open parens at " << o_idx << "\n"; - // Find next close paren that matches to inner open paren - for (auto cparen=oparen2; cparen != parens.end(); ++cparen) { - unsigned int c_idx = std::get<0>(*cparen); - bool c_open = std::get<1>(*cparen); + for (auto cparen = oparen2; cparen != parens.end(); ++cparen) { + unsigned int c_idx = std::get<0>(*cparen); + bool c_open = std::get<1>(*cparen); unsigned int c_level = std::get<2>(*cparen); if (!c_open && c_level == 2) { - // Found match debug(PARENS) << "\tFound first close paren at " << c_idx << "\n"; - if ((cparen+1) == parens.end()) continue; + if ((cparen + 1) == parens.end()) continue; - // If there's another ) next, get it - auto cparen2=cparen+1; - if (std::get<0>(*cparen2)==c_idx+1 && !std::get<1>(*cparen2)) { // idx is +1 from close idx, and is close + auto cparen2 = cparen + 1; + if (std::get<0>(*cparen2) == c_idx + 1 && !std::get<1>(*cparen2)) { debug(PARENS) << ("\tFOUND ((...)) pair, removing\n"); - // Delete all elements with idx between (o_idx, c_idx), inclusive - parens.erase(oparen, cparen2+1); // Include cparen2 in the delete + parens.erase(oparen, cparen2 + 1); goto remove_attributes; } - // We only want to examine the first matching close paren, break after - // XXX this may be a bad assumption that this list is ordered break; } } } } - } + } - ParenInfo &cp = parens[l-1]; + ParenInfo &cp = parens[l - 1]; ParenInfo &op = parens[0]; if (std::get<1>(op) == true && std::get<1>(cp) == false) { - // first is open and last is close if (std::get<2>(op) == 1 && std::get<2>(cp) == 1) { - // and both are level 1 -- good - } - else { - debug(PARENS) << "Clearing parens since levels of open/close arent both 1\n"; + // good + } else { + debug(PARENS) << "Clearing parens since levels of open/close aren't both 1\n"; parens.clear(); } - } - else { - debug(PARENS) << "Clearing parens since we dont have op/close as first/last\n"; + } else { + debug(PARENS) << "Clearing parens since we don't have op/close as first/last\n"; parens.clear(); } } return parens; } - -/* - This one is really our fault. We have the string-ified version of - the ast node that is an lval we want to siphon off as a dua. This - comes from libdwarf, by way of the pri magic in PANDA. This means - we can get something like - - ((*((**(pdtbl)).pub)).sent_table)) - - Before we siphon that dua off, we need to test the various ptrs that - will end up getting dereferenced to make sure they aren't null. So - we use getparens to find the balanced parens, and then consider each to - see if it starts wit '(*' or '(**' or .. And if so, we add checks to - ensure that ptrs are non-null - - So, for this example, we want - - if (pdtbl && *pdtbl && ((**(pdtbl)).pub)) {...} - - This, too, is reprehensible. But, gotta get things to work. Not - sorry. Right solution would be to have pri figure this out? - -*/ - std::string createNonNullTests(std::string sourceString) { ParensInfo parens = getParens(sourceString); debug(PARENS) << "nntest [" << sourceString << "]\n"; @@ -285,25 +212,23 @@ std::string createNonNullTests(std::string sourceString) { while (true) { unsigned i_open; bool found = false; - for (i_open=curr; i_open(parens[i_open])) - // found next open + for (i_open = curr; i_open < parens.size(); i_open++) { + if (std::get<1>(parens[i_open])) { found = true; break; + } } - if (!found) break; // end loop after parsing last pair of parens + if (!found) break; ParenInfo oinfo = parens[i_open]; size_t opos = std::get<0>(oinfo); unsigned olevel = std::get<2>(oinfo); unsigned i_close; found = false; - for (i_close=i_open+1; i_close(parens[i_close]); - size_t level = std::get<2>(parens[i_close]); + unsigned level = std::get<2>(parens[i_close]); if (!isopen && level == olevel) { - // found first close after that open - // which is at same level found = true; break; } @@ -311,17 +236,16 @@ std::string createNonNullTests(std::string sourceString) { if (!found) break; ParenInfo cinfo = parens[i_close]; size_t cpos = std::get<0>(cinfo); - std::string cand = sourceString.substr(opos, cpos-opos+1); - // (**(pdtbl)) - unsigned num_stars=1; - for (num_stars=1; num_stars 0) { debug(PARENS) << "cand = [" << cand << "]\n"; debug(PARENS) << "num_stars = " << num_stars << "\n"; - for (unsigned i=0; i o2)) return std::string("Invalid"); - return (std::string(buf, o2-o1+1)); + return (std::string(buf, o2 - o1 + 1)); } - - - - -// find location of str after loc -// sets *inv=true if something went wrong or we didnt find - SourceLocation getLocAfterStr(const SourceManager &sm, SourceLocation &loc, const char *str, unsigned str_len, unsigned max_search, bool *inv) { const char *buf = sm.getCharacterData(loc, inv); if (*inv) { - // getchardata failed return loc; } debug(PARENS) << "getCharacterData succeeded\n"; - // getCharacterData succeeded const char *p = strstr(buf, str); if (p == NULL) { - // didnt find the string *inv = true; return loc; } - // found the string. *inv = false; return loc.getLocWithOffset(p - buf); } -/* - const char *p = buf; - *inv = true; - while (true) { - if (0 == strncmp(p, str, str_len)) { - // found the str in the source - *inv = false; - break; - } - p++; - if (p-buf > max_search) - break; - } - if (!(*inv)) { - unsigned pos = p - buf; - //debug(FNARG) << "Found [" << str << "] @ " << pos << "\n"; - std::string uptomatch = std::string(buf, str_len + pos); - //debug(FNARG) << "uptomatch: [" << uptomatch << "]\n"; - return loc.getLocWithOffset(str_len + pos); - } - } - return loc; -} -*/ - -// comparison of source locations based on file offset -// XXX better to make sure l1 and l2 in same file? int srcLocCmp(const SourceManager &sm, SourceLocation &l1, SourceLocation &l2) { unsigned o1 = sm.getFileOffset(l1); unsigned o2 = sm.getFileOffset(l2); - if (o1o2) return SCMP_GREATER; + if (o1 < o2) return SCMP_LESS; + if (o1 > o2) return SCMP_GREATER; return SCMP_EQUAL; } +typedef std::tuple SLParenInfo; +typedef std::vector SLParensInfo; -typedef std::tuple < SourceLocation, bool, unsigned > SLParenInfo ; - -typedef std::vector < SLParenInfo > SLParensInfo; - -/* - returns a vector of paren info tuples in terms of SourceLocation instead of - position in a string -*/ - -SLParensInfo SLgetParens(const SourceManager &sm, SourceLocation &l1, - SourceLocation &l2) { - +SLParensInfo SLgetParens(const SourceManager &sm, SourceLocation &l1, SourceLocation &l2) { SLParensInfo slparens; bool inv; std::string sourceStr = getStringBetweenRange(sm, SourceRange(l1, l2), &inv); debug(GENERAL) << "SLgetParens sourceStr = [" << sourceStr << "]\n"; if (inv) { debug(GENERAL) << "Invalid\n"; - } - else { + } else { ParensInfo parens = getParens(sourceStr); for (auto paren : parens) { size_t pos = std::get<0>(paren); - unsigned isopen = std::get<1>(paren); + bool isopen = std::get<1>(paren); unsigned level = std::get<2>(paren); debug(GENERAL) << "Found paren pair open=" << isopen << ", level=" << level << "\n"; SourceLocation sl = l1.getLocWithOffset(pos); @@ -456,4 +330,4 @@ SLParensInfo SLgetParens(const SourceManager &sm, SourceLocation &l1, } } return slparens; -} +} \ No newline at end of file diff --git a/tools/lavaTool/test-compile-on-docker.sh b/tools/lavaTool/test-compile-on-docker.sh deleted file mode 100755 index 9da41d1b..00000000 --- a/tools/lavaTool/test-compile-on-docker.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/bash - -set -x - -lava="$(dirname "$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )")" -llvm_home="/llvm-3.6.2" - -# Start host_fninstr.sh, a watchdog that runs fninstr.sh on the host as necessary -# Argument = number of targets to rebuild -NUM_TARGETS="3" -$lava/lavaTool/tests/host_fninstr.sh "$lava/lavaTool/tests" $NUM_TARGETS &> $lava/lavaTool/tests/log_hostfninstr.txt & -PID="$!" -echo "Started host_fninstr with pid=$PID" - -docker run --rm -it \ - -e "HTTP_PROXY=$HTTP_PROXY" \ - -e "HTTPS_PROXY=$HTTPS_PROXY" \ - -e "http_proxy=$http_proxy" \ - -e "https_proxy=$https_proxy" \ - -e "LLVM_DIR=$llvm_home" \ - -e "PATH=$PATH:/llvm-3.6.2/Release/bin" \ - -v /var/run/postgresql:/var/run/postgresql \ - -v /etc/passwd:/etc/passwd:ro \ - -v /etc/group:/etc/group:ro \ - -v /etc/shadow:/etc/shadow:ro \ - -v /etc/gshadow:/etc/gshadow:ro \ - -v $HOME:$HOME \ - -v "$lava":"$lava" \ - lava32 sh -c "trap '' PIPE; su -l $(whoami) -c 'cmake -B$lava/build -H$lava -DCMAKE_INSTALL_PREFIX=$lava/install' && su -l $(whoami) -c 'make -j$(nproc) -C \"$lava\"/build/lavaTool install && $lava/lavaTool/tests/run.sh \"$lava\"'" - #lava32 sh -c "trap '' PIPE; su -l $(whoami) -c 'make -j$(nproc) -C \"$lava\"/src_clang && $lava/src_clang/_tests/run.sh \"$lava\"'" - -kill $PID 2>/dev/null # Kill host_fninstr.sh diff --git a/tools/lavaTool/tests/run.sh b/tools/lavaTool/tests/run.sh index ab888d6d..8255e807 100755 --- a/tools/lavaTool/tests/run.sh +++ b/tools/lavaTool/tests/run.sh @@ -34,7 +34,7 @@ runtest() { ../../../install/bin/lavaTool -debug -lava-wl ./$1.fnwl -arg_dataflow -src-prefix=`pwd` -action=inject $1.c &> lavaTool.log cp $1.c{,.bak} - /llvm-3.6.2/Release/bin/clang-apply-replacements . + /usr/lib/llvm-11/bin/clang-apply-replacements . make clean make &> cc.log