From da0ee182e5e9ee18ebaa0252b26d944640d5f8ba Mon Sep 17 00:00:00 2001 From: xuqingchai Date: Wed, 12 Jan 2022 23:56:35 +0800 Subject: [PATCH] finish hw04 --- CMakeLists.txt | 21 +++++++------ main.cpp | 80 +++++++++++++++++++++++++++++--------------------- 2 files changed, 59 insertions(+), 42 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 29b152c..1c641ca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,9 +1,12 @@ -cmake_minimum_required(VERSION 3.12) -project(hellocmake LANGUAGES CXX) - -set(CMAKE_CXX_STANDARD 17) -if (NOT CMAKE_BUILD_TYPE) - set(CMAKE_BUILD_TYPE Release) -endif() - -add_executable(main main.cpp) +cmake_minimum_required(VERSION 3.12) +project(hellocmake LANGUAGES CXX) + +set(CMAKE_CXX_STANDARD 17) +if (NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release) +endif() + +add_executable(main main.cpp) +find_package(OpenMP REQUIRED) +target_link_libraries(main PUBLIC OpenMP::OpenMP_CXX) +target_compile_options(main PUBLIC -ffast-math -march=native) \ No newline at end of file diff --git a/main.cpp b/main.cpp index cf6369b..d4f344e 100644 --- a/main.cpp +++ b/main.cpp @@ -4,63 +4,76 @@ #include #include -float frand() { +static float frand() { return (float)rand() / RAND_MAX * 2 - 1; } -struct Star { - float px, py, pz; - float vx, vy, vz; - float mass; +struct alignas(64) Star { + float px[48], py[48], pz[48]; + float vx[48], vy[48], vz[48]; + float mass[48]; }; -std::vector stars; +Star stars; void init() { + #pragma GCC unroll 4 for (int i = 0; i < 48; i++) { - stars.push_back({ - frand(), frand(), frand(), - frand(), frand(), frand(), - frand() + 1, - }); + stars.px[i] = frand(); + stars.py[i] = frand(); + stars.pz[i] = frand(); + stars.vx[i] = frand(); + stars.vy[i] = frand(); + stars.vz[i] = frand(); + stars.mass[i] = frand() + 1; } } float G = 0.001; float eps = 0.001; +float eps_square = eps * eps; float dt = 0.01; void step() { - for (auto &star: stars) { - for (auto &other: stars) { - float dx = other.px - star.px; - float dy = other.py - star.py; - float dz = other.pz - star.pz; - float d2 = dx * dx + dy * dy + dz * dz + eps * eps; - d2 *= sqrt(d2); - star.vx += dx * other.mass * G * dt / d2; - star.vy += dy * other.mass * G * dt / d2; - star.vz += dz * other.mass * G * dt / d2; + #pragma GCC unroll 4 + for (int i = 0; i < 48; i++) { + float px = stars.px[i], py = stars.py[i], pz = stars.pz[i]; + float vx_plus = 0, vy_plus = 0, vz_plus = 0; + #pragma GCC unroll 4 + for (int j = 0; j < 48; j++) { + float dx = stars.px[j] - px; + float dy = stars.py[j] - py; + float dz = stars.pz[j] - pz; + float d2 = dx * dx + dy * dy + dz * dz + eps_square; + d2 *= std::sqrt(d2); + float value = stars.mass[j] * (G * dt) / d2; + vx_plus += dx * value; + vy_plus += dy * value; + vz_plus += dz * value; } + stars.vx[i] += vx_plus; + stars.vy[i] += vy_plus; + stars.vz[i] += vz_plus; } - for (auto &star: stars) { - star.px += star.vx * dt; - star.py += star.vy * dt; - star.pz += star.vz * dt; + #pragma GCC unroll 4 + for (int i = 0; i < 48; i++) { + stars.px[i] += stars.vx[i] * dt; + stars.py[i] += stars.vy[i] * dt; + stars.pz[i] += stars.vz[i] * dt; } } float calc() { float energy = 0; - for (auto &star: stars) { - float v2 = star.vx * star.vx + star.vy * star.vy + star.vz * star.vz; - energy += star.mass * v2 / 2; - for (auto &other: stars) { - float dx = other.px - star.px; - float dy = other.py - star.py; - float dz = other.pz - star.pz; + for (int i = 0; i < 48; i++) { + float v2 = stars.vx[i] * stars.vx[i] + stars.vy[i] * stars.vy[i] + stars.vz[i] * stars.vz[i]; + energy += stars.mass[i] * v2 / 2; + for (int j = 0; j < 48; j++) { + float dx = stars.px[j] - stars.px[i]; + float dy = stars.py[j] - stars.py[i]; + float dz = stars.pz[j] - stars.pz[i]; float d2 = dx * dx + dy * dy + dz * dz + eps * eps; - energy -= other.mass * star.mass * G / sqrt(d2) / 2; + energy -= stars.mass[j] * stars.mass[i] * G / sqrt(d2) / 2; } } return energy; @@ -79,6 +92,7 @@ int main() { init(); printf("Initial energy: %f\n", calc()); auto dt = benchmark([&] { + #pragma GCC unroll 64 for (int i = 0; i < 100000; i++) step(); });